diff --git a/api/auth/auth_project.py b/api/auth/auth_project.py index 98a495bbb..6f842916b 100644 --- a/api/auth/auth_project.py +++ b/api/auth/auth_project.py @@ -15,10 +15,12 @@ class ProjectAuthorizer: if len(request.path_params.keys()) == 0 or request.path_params.get(self.project_identifier) is None: return current_user: schemas.CurrentContext = await OR_context(request) - project_identifier = request.path_params[self.project_identifier] + value = request.path_params[self.project_identifier] if (self.project_identifier == "projectId" \ - and projects.get_project(project_id=project_identifier, tenant_id=current_user.tenant_id) is None) \ - or (self.project_identifier.lower() == "projectKey" \ - and projects.get_internal_project_id(project_key=project_identifier) is None): + and not (isinstance(value, int) or isinstance(value, str) and value.isnumeric()) + and projects.get_project(project_id=value, tenant_id=current_user.tenant_id) is None) \ + or (self.project_identifier == "projectKey" \ + and projects.get_internal_project_id(project_key=value) is None): print("project not found") + print(value) raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="project not found.") diff --git a/api/chalicelib/core/autocomplete.py b/api/chalicelib/core/autocomplete.py new file mode 100644 index 000000000..168cbbb83 --- /dev/null +++ b/api/chalicelib/core/autocomplete.py @@ -0,0 +1,104 @@ +import schemas +from chalicelib.utils import helper +from chalicelib.utils import pg_client +from chalicelib.utils.event_filter_definition import Event + +TABLE = "public.autocomplete" + + +def __get_autocomplete_table(value, project_id): + autocomplete_events = [schemas.FilterType.rev_id, + schemas.EventType.click, + schemas.FilterType.user_device, + schemas.FilterType.user_id, + schemas.FilterType.user_browser, + schemas.FilterType.user_os, + schemas.EventType.custom, + schemas.FilterType.user_country, + schemas.EventType.location, + schemas.EventType.input] + autocomplete_events.sort() + sub_queries = [] + for e in autocomplete_events: + sub_queries.append(f"""(SELECT type, value + FROM {TABLE} + WHERE project_id = %(project_id)s + AND type= '{e}' + AND value ILIKE %(svalue)s + LIMIT 5)""") + if len(value) > 2: + sub_queries.append(f"""(SELECT type, value + FROM {TABLE} + WHERE project_id = %(project_id)s + AND type= '{e}' + AND value ILIKE %(value)s + LIMIT 5)""") + with pg_client.PostgresClient() as cur: + query = cur.mogrify(" UNION DISTINCT ".join(sub_queries) + ";", + {"project_id": project_id, "value": helper.string_to_sql_like(value), + "svalue": helper.string_to_sql_like("^" + value)}) + try: + cur.execute(query) + except Exception as err: + print("--------- AUTOCOMPLETE SEARCH QUERY EXCEPTION -----------") + print(query.decode('UTF-8')) + print("--------- VALUE -----------") + print(value) + print("--------------------") + raise err + results = helper.list_to_camel_case(cur.fetchall()) + return results + + +def __generic_query(typename, value_length=None): + if value_length is None or value_length > 2: + return f"""(SELECT DISTINCT value, type + FROM {TABLE} + WHERE + project_id = %(project_id)s + AND type='{typename}' + AND value ILIKE %(svalue)s + ORDER BY value + LIMIT 5) + UNION DISTINCT + (SELECT DISTINCT value, type + FROM {TABLE} + WHERE + project_id = %(project_id)s + AND type='{typename}' + AND value ILIKE %(value)s + ORDER BY value + LIMIT 5);""" + return f"""SELECT DISTINCT value, type + FROM {TABLE} + WHERE + project_id = %(project_id)s + AND type='{typename}' + AND value ILIKE %(svalue)s + ORDER BY value + LIMIT 10;""" + + +def __generic_autocomplete(event: Event): + def f(project_id, value, key=None, source=None): + with pg_client.PostgresClient() as cur: + query = __generic_query(event.ui_type, value_length=len(value)) + params = {"project_id": project_id, "value": helper.string_to_sql_like(value), + "svalue": helper.string_to_sql_like("^" + value)} + cur.execute(cur.mogrify(query, params)) + return helper.list_to_camel_case(cur.fetchall()) + + return f + + +def __generic_autocomplete_metas(typename): + def f(project_id, text): + with pg_client.PostgresClient() as cur: + query = cur.mogrify(__generic_query(typename, value_length=len(text)), + {"project_id": project_id, "value": helper.string_to_sql_like(text), + "svalue": helper.string_to_sql_like("^" + text)}) + cur.execute(query) + rows = cur.fetchall() + return rows + + return f diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py index e9e127c4e..29c4b6fa9 100644 --- a/api/chalicelib/core/custom_metrics.py +++ b/api/chalicelib/core/custom_metrics.py @@ -91,7 +91,7 @@ def __get_sessions_list(project_id, user_id, data): data.series[0].filter.endDate = data.endTimestamp data.series[0].filter.page = data.page data.series[0].filter.limit = data.limit - return sessions.search2_pg(data=data.series[0].filter, project_id=project_id, user_id=user_id) + return sessions.search_sessions(data=data.series[0].filter, project_id=project_id, user_id=user_id) def merged_live(project_id, data: schemas.TryCustomMetricsPayloadSchema, user_id=None): @@ -166,7 +166,7 @@ def get_sessions(project_id, user_id, metric_id, data: schemas.CustomMetricSessi s.filter.limit = data.limit s.filter.page = data.page results.append({"seriesId": s.series_id, "seriesName": s.name, - **sessions.search2_pg(data=s.filter, project_id=project_id, user_id=user_id)}) + **sessions.search_sessions(data=s.filter, project_id=project_id, user_id=user_id)}) return results @@ -213,7 +213,7 @@ def try_sessions(project_id, user_id, data: schemas.CustomMetricSessionsPayloadS s.filter.limit = data.limit s.filter.page = data.page results.append({"seriesId": None, "seriesName": s.name, - **sessions.search2_pg(data=s.filter, project_id=project_id, user_id=user_id)}) + **sessions.search_sessions(data=s.filter, project_id=project_id, user_id=user_id)}) return results @@ -532,7 +532,7 @@ def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, "lostConversions": 0, "unaffectedSessions": 0} return {"seriesId": s.series_id, "seriesName": s.name, - "sessions": sessions.search2_pg(user_id=user_id, project_id=project_id, - issue=issue, data=s.filter) + "sessions": sessions.search_sessions(user_id=user_id, project_id=project_id, + issue=issue, data=s.filter) if issue is not None else {"total": 0, "sessions": []}, "issue": issue} diff --git a/api/chalicelib/core/errors.py b/api/chalicelib/core/errors.py index bbdea726b..c22a51445 100644 --- a/api/chalicelib/core/errors.py +++ b/api/chalicelib/core/errors.py @@ -251,10 +251,7 @@ def get_details(project_id, error_id, user_id, **data): parent_error_id,session_id, user_anonymous_id, user_id, user_uuid, user_browser, user_browser_version, user_os, user_os_version, user_device, payload, - COALESCE((SELECT TRUE - FROM public.user_favorite_errors AS fe - WHERE pe.error_id = fe.error_id - AND fe.user_id = %(user_id)s), FALSE) AS favorite, + FALSE AS favorite, True AS viewed FROM public.errors AS pe INNER JOIN events.errors AS ee USING (error_id) @@ -424,10 +421,11 @@ def __get_sort_key(key): }.get(key, 'max_datetime') -def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): - empty_response = {'total': 0, - 'errors': [] - } +def search(data: schemas.SearchErrorsSchema, project_id, user_id): + empty_response = { + 'total': 0, + 'errors': [] + } platform = None for f in data.filters: @@ -449,17 +447,12 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): data.endDate = TimeUTC.now(1) if len(data.events) > 0 or len(data.filters) > 0: print("-- searching for sessions before errors") - # if favorite_only=True search for sessions associated with favorite_error - statuses = sessions.search2_pg(data=data, project_id=project_id, user_id=user_id, errors_only=True, - error_status=data.status) + statuses = sessions.search_sessions(data=data, project_id=project_id, user_id=user_id, errors_only=True, + error_status=data.status) if len(statuses) == 0: return empty_response error_ids = [e["errorId"] for e in statuses] with pg_client.PostgresClient() as cur: - if data.startDate is None: - data.startDate = TimeUTC.now(-7) - if data.endDate is None: - data.endDate = TimeUTC.now() step_size = __get_step_size(data.startDate, data.endDate, data.density, factor=1) sort = __get_sort_key('datetime') if data.sort is not None: @@ -488,9 +481,9 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): if error_ids is not None: params["error_ids"] = tuple(error_ids) pg_sub_query.append("error_id IN %(error_ids)s") - if data.bookmarked: - pg_sub_query.append("ufe.user_id = %(userId)s") - extra_join += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" + # if data.bookmarked: + # pg_sub_query.append("ufe.user_id = %(userId)s") + # extra_join += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" if data.query is not None and len(data.query) > 0: pg_sub_query.append("(pe.name ILIKE %(error_query)s OR pe.message ILIKE %(error_query)s)") params["error_query"] = helper.values_for_operator(value=data.query, @@ -509,7 +502,7 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): FROM (SELECT error_id, name, message, - COUNT(DISTINCT user_uuid) AS users, + COUNT(DISTINCT COALESCE(user_id,user_uuid)) AS users, COUNT(DISTINCT session_id) AS sessions, MAX(timestamp) AS max_datetime, MIN(timestamp) AS min_datetime @@ -544,19 +537,13 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): cur.execute(cur.mogrify(main_pg_query, params)) rows = cur.fetchall() total = 0 if len(rows) == 0 else rows[0]["full_count"] - if flows: - return {"count": total} if total == 0: rows = [] else: if len(statuses) == 0: query = cur.mogrify( - """SELECT error_id, status, parent_error_id, payload, - COALESCE((SELECT TRUE - FROM public.user_favorite_errors AS fe - WHERE errors.error_id = fe.error_id - AND fe.user_id = %(user_id)s LIMIT 1), FALSE) AS favorite, + """SELECT error_id, COALESCE((SELECT TRUE FROM public.user_viewed_errors AS ve WHERE errors.error_id = ve.error_id @@ -574,26 +561,12 @@ def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): for r in rows: r.pop("full_count") if r["error_id"] in statuses: - r["status"] = statuses[r["error_id"]]["status"] - r["parent_error_id"] = statuses[r["error_id"]]["parentErrorId"] - r["favorite"] = statuses[r["error_id"]]["favorite"] r["viewed"] = statuses[r["error_id"]]["viewed"] - r["stack"] = format_first_stack_frame(statuses[r["error_id"]])["stack"] else: - r["status"] = "untracked" - r["parent_error_id"] = None - r["favorite"] = False r["viewed"] = False - r["stack"] = None - offset = len(rows) - rows = [r for r in rows if r["stack"] is None - or (len(r["stack"]) == 0 or len(r["stack"]) > 1 - or len(r["stack"]) > 0 - and (r["message"].lower() != "script error." or len(r["stack"][0]["absPath"]) > 0))] - offset -= len(rows) return { - 'total': total - offset, + 'total': total, 'errors': helper.list_to_camel_case(rows) } diff --git a/api/chalicelib/core/errors_favorite.py b/api/chalicelib/core/errors_favorite.py new file mode 100644 index 000000000..c9be88bcb --- /dev/null +++ b/api/chalicelib/core/errors_favorite.py @@ -0,0 +1,48 @@ +from chalicelib.utils import pg_client + + +def add_favorite_error(project_id, user_id, error_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify(f"""INSERT INTO public.user_favorite_errors(user_id, error_id) + VALUES (%(userId)s,%(error_id)s);""", + {"userId": user_id, "error_id": error_id}) + ) + return {"errorId": error_id, "favorite": True} + + +def remove_favorite_error(project_id, user_id, error_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify(f"""DELETE FROM public.user_favorite_errors + WHERE + user_id = %(userId)s + AND error_id = %(error_id)s;""", + {"userId": user_id, "error_id": error_id}) + ) + return {"errorId": error_id, "favorite": False} + + +def favorite_error(project_id, user_id, error_id): + exists, favorite = error_exists_and_favorite(user_id=user_id, error_id=error_id) + if not exists: + return {"errors": ["cannot bookmark non-rehydrated errors"]} + if favorite: + return remove_favorite_error(project_id=project_id, user_id=user_id, error_id=error_id) + return add_favorite_error(project_id=project_id, user_id=user_id, error_id=error_id) + + +def error_exists_and_favorite(user_id, error_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify( + """SELECT errors.error_id AS exists, ufe.error_id AS favorite + FROM public.errors + LEFT JOIN (SELECT error_id FROM public.user_favorite_errors WHERE user_id = %(userId)s) AS ufe USING (error_id) + WHERE error_id = %(error_id)s;""", + {"userId": user_id, "error_id": error_id}) + ) + r = cur.fetchone() + if r is None: + return False, False + return True, r.get("favorite") is not None diff --git a/api/chalicelib/core/errors_favorite_viewed.py b/api/chalicelib/core/errors_favorite_viewed.py deleted file mode 100644 index 0bbc10b68..000000000 --- a/api/chalicelib/core/errors_favorite_viewed.py +++ /dev/null @@ -1,91 +0,0 @@ -from chalicelib.utils import pg_client - - -def add_favorite_error(project_id, user_id, error_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify(f"""\ - INSERT INTO public.user_favorite_errors - (user_id, error_id) - VALUES - (%(userId)s,%(error_id)s);""", - {"userId": user_id, "error_id": error_id}) - ) - return {"errorId": error_id, "favorite": True} - - -def remove_favorite_error(project_id, user_id, error_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify(f"""\ - DELETE FROM public.user_favorite_errors - WHERE - user_id = %(userId)s - AND error_id = %(error_id)s;""", - {"userId": user_id, "error_id": error_id}) - ) - return {"errorId": error_id, "favorite": False} - - -def favorite_error(project_id, user_id, error_id): - exists, favorite = error_exists_and_favorite(user_id=user_id, error_id=error_id) - if not exists: - return {"errors": ["cannot bookmark non-rehydrated errors"]} - if favorite: - return remove_favorite_error(project_id=project_id, user_id=user_id, error_id=error_id) - return add_favorite_error(project_id=project_id, user_id=user_id, error_id=error_id) - - -def error_exists_and_favorite(user_id, error_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify( - """SELECT errors.error_id AS exists, ufe.error_id AS favorite - FROM public.errors - LEFT JOIN (SELECT error_id FROM public.user_favorite_errors WHERE user_id = %(userId)s) AS ufe USING (error_id) - WHERE error_id = %(error_id)s;""", - {"userId": user_id, "error_id": error_id}) - ) - r = cur.fetchone() - if r is None: - return False, False - return True, r.get("favorite") is not None - - -def add_viewed_error(project_id, user_id, error_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify("""\ - INSERT INTO public.user_viewed_errors - (user_id, error_id) - VALUES - (%(userId)s,%(error_id)s);""", - {"userId": user_id, "error_id": error_id}) - ) - - -def viewed_error_exists(user_id, error_id): - with pg_client.PostgresClient() as cur: - query = cur.mogrify( - """SELECT - errors.error_id AS hydrated, - COALESCE((SELECT TRUE - FROM public.user_viewed_errors AS ve - WHERE ve.error_id = %(error_id)s - AND ve.user_id = %(userId)s LIMIT 1), FALSE) AS viewed - FROM public.errors - WHERE error_id = %(error_id)s""", - {"userId": user_id, "error_id": error_id}) - cur.execute( - query=query - ) - r = cur.fetchone() - if r: - return r.get("viewed") - return True - - -def viewed_error(project_id, user_id, error_id): - if viewed_error_exists(user_id=user_id, error_id=error_id): - return None - return add_viewed_error(project_id=project_id, user_id=user_id, error_id=error_id) diff --git a/api/chalicelib/core/errors_viewed.py b/api/chalicelib/core/errors_viewed.py new file mode 100644 index 000000000..f230358b4 --- /dev/null +++ b/api/chalicelib/core/errors_viewed.py @@ -0,0 +1,37 @@ +from chalicelib.utils import pg_client + + +def add_viewed_error(project_id, user_id, error_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify("""INSERT INTO public.user_viewed_errors(user_id, error_id) + VALUES (%(userId)s,%(error_id)s);""", + {"userId": user_id, "error_id": error_id}) + ) + + +def viewed_error_exists(user_id, error_id): + with pg_client.PostgresClient() as cur: + query = cur.mogrify( + """SELECT + errors.error_id AS hydrated, + COALESCE((SELECT TRUE + FROM public.user_viewed_errors AS ve + WHERE ve.error_id = %(error_id)s + AND ve.user_id = %(userId)s LIMIT 1), FALSE) AS viewed + FROM public.errors + WHERE error_id = %(error_id)s""", + {"userId": user_id, "error_id": error_id}) + cur.execute( + query=query + ) + r = cur.fetchone() + if r: + return r.get("viewed") + return True + + +def viewed_error(project_id, user_id, error_id): + if viewed_error_exists(user_id=user_id, error_id=error_id): + return None + return add_viewed_error(project_id=project_id, user_id=user_id, error_id=error_id) diff --git a/api/chalicelib/core/events.py b/api/chalicelib/core/events.py index dd9562de1..e2c979799 100644 --- a/api/chalicelib/core/events.py +++ b/api/chalicelib/core/events.py @@ -1,10 +1,14 @@ import schemas from chalicelib.core import issues -from chalicelib.core import sessions_metas, metadata +from chalicelib.core import metadata +from chalicelib.core import sessions_metas + from chalicelib.utils import pg_client, helper from chalicelib.utils.TimeUTC import TimeUTC from chalicelib.utils.event_filter_definition import SupportedFilter, Event +from chalicelib.core import autocomplete + def get_customs_by_sessionId2_pg(session_id, project_id): with pg_client.PostgresClient() as cur: @@ -92,11 +96,6 @@ def get_by_sessionId2_pg(session_id, project_id, group_clickrage=False): return rows -def __get_data_for_extend(data): - if "errors" not in data: - return data["data"] - - def __pg_errors_query(source=None, value_length=None): if value_length is None or value_length > 2: return f"""((SELECT DISTINCT ON(lg.message) @@ -110,7 +109,7 @@ def __pg_errors_query(source=None, value_length=None): AND lg.project_id = %(project_id)s {"AND source = %(source)s" if source is not None else ""} LIMIT 5) - UNION ALL + UNION DISTINCT (SELECT DISTINCT ON(lg.name) lg.name AS value, source, @@ -122,7 +121,7 @@ def __pg_errors_query(source=None, value_length=None): AND lg.project_id = %(project_id)s {"AND source = %(source)s" if source is not None else ""} LIMIT 5) - UNION + UNION DISTINCT (SELECT DISTINCT ON(lg.message) lg.message AS value, source, @@ -134,7 +133,7 @@ def __pg_errors_query(source=None, value_length=None): AND lg.project_id = %(project_id)s {"AND source = %(source)s" if source is not None else ""} LIMIT 5) - UNION ALL + UNION DISTINCT (SELECT DISTINCT ON(lg.name) lg.name AS value, source, @@ -157,7 +156,7 @@ def __pg_errors_query(source=None, value_length=None): AND lg.project_id = %(project_id)s {"AND source = %(source)s" if source is not None else ""} LIMIT 5) - UNION ALL + UNION DISTINCT (SELECT DISTINCT ON(lg.name) lg.name AS value, source, @@ -177,8 +176,7 @@ def __search_pg_errors(project_id, value, key=None, source=None): with pg_client.PostgresClient() as cur: cur.execute( cur.mogrify(__pg_errors_query(source, - value_length=len(value) \ - if SUPPORTED_TYPES[event_type.ERROR.ui_type].change_by_length else None), + value_length=len(value)), {"project_id": project_id, "value": helper.string_to_sql_like(value), "svalue": helper.string_to_sql_like("^" + value), "source": source})) @@ -189,7 +187,7 @@ def __search_pg_errors(project_id, value, key=None, source=None): def __search_pg_errors_ios(project_id, value, key=None, source=None): now = TimeUTC.now() - if SUPPORTED_TYPES[event_type.ERROR_IOS.ui_type].change_by_length is False or len(value) > 2: + if len(value) > 2: query = f"""(SELECT DISTINCT ON(lg.reason) lg.reason AS value, '{event_type.ERROR_IOS.ui_type}' AS type @@ -268,7 +266,7 @@ def __search_pg_metadata(project_id, value, key=None, source=None): for k in meta_keys.keys(): colname = metadata.index_to_colname(meta_keys[k]) - if SUPPORTED_TYPES[event_type.METADATA.ui_type].change_by_length is False or len(value) > 2: + if len(value) > 2: sub_from.append(f"""((SELECT DISTINCT ON ({colname}) {colname} AS value, '{k}' AS key FROM public.sessions WHERE project_id = %(project_id)s @@ -294,48 +292,6 @@ def __search_pg_metadata(project_id, value, key=None, source=None): return results -def __generic_query(typename, value_length=None): - if value_length is None or value_length > 2: - return f"""(SELECT DISTINCT value, type - FROM public.autocomplete - WHERE - project_id = %(project_id)s - AND type='{typename}' - AND value ILIKE %(svalue)s - LIMIT 5) - UNION - (SELECT DISTINCT value, type - FROM public.autocomplete - WHERE - project_id = %(project_id)s - AND type='{typename}' - AND value ILIKE %(value)s - LIMIT 5);""" - return f"""SELECT DISTINCT value, type - FROM public.autocomplete - WHERE - project_id = %(project_id)s - AND type='{typename}' - AND value ILIKE %(svalue)s - LIMIT 10;""" - - -def __generic_autocomplete(event: Event): - def f(project_id, value, key=None, source=None): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify( - __generic_query(event.ui_type, - value_length=len(value) \ - if SUPPORTED_TYPES[event.ui_type].change_by_length \ - else None), - {"project_id": project_id, "value": helper.string_to_sql_like(value), - "svalue": helper.string_to_sql_like("^" + value)})) - return helper.list_to_camel_case(cur.fetchall()) - - return f - - class event_type: CLICK = Event(ui_type=schemas.EventType.click, table="events.clicks", column="label") INPUT = Event(ui_type=schemas.EventType.input, table="events.inputs", column="label") @@ -358,99 +314,65 @@ class event_type: SUPPORTED_TYPES = { - event_type.CLICK.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.CLICK), - query=__generic_query(typename=event_type.CLICK.ui_type), - change_by_length=True), - event_type.INPUT.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.INPUT), - query=__generic_query(typename=event_type.INPUT.ui_type), - change_by_length=True), - event_type.LOCATION.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.LOCATION), - query=__generic_query(typename=event_type.LOCATION.ui_type), - change_by_length=True), - event_type.CUSTOM.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.CUSTOM), - query=__generic_query(typename=event_type.CUSTOM.ui_type), - change_by_length=True), - event_type.REQUEST.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.REQUEST), - query=__generic_query(typename=event_type.REQUEST.ui_type), - change_by_length=True), - event_type.GRAPHQL.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.GRAPHQL), - query=__generic_query(typename=event_type.GRAPHQL.ui_type), - change_by_length=True), - event_type.STATEACTION.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.STATEACTION), - query=__generic_query(typename=event_type.STATEACTION.ui_type), - change_by_length=True), + event_type.CLICK.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.CLICK), + query=autocomplete.__generic_query(typename=event_type.CLICK.ui_type)), + event_type.INPUT.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.INPUT), + query=autocomplete.__generic_query(typename=event_type.INPUT.ui_type)), + event_type.LOCATION.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.LOCATION), + query=autocomplete.__generic_query( + typename=event_type.LOCATION.ui_type)), + event_type.CUSTOM.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.CUSTOM), + query=autocomplete.__generic_query(typename=event_type.CUSTOM.ui_type)), + event_type.REQUEST.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.REQUEST), + query=autocomplete.__generic_query( + typename=event_type.REQUEST.ui_type)), + event_type.GRAPHQL.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.GRAPHQL), + query=autocomplete.__generic_query( + typename=event_type.GRAPHQL.ui_type)), + event_type.STATEACTION.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.STATEACTION), + query=autocomplete.__generic_query( + typename=event_type.STATEACTION.ui_type)), event_type.ERROR.ui_type: SupportedFilter(get=__search_pg_errors, - query=None, change_by_length=True), + query=None), event_type.METADATA.ui_type: SupportedFilter(get=__search_pg_metadata, - query=None, change_by_length=True), + query=None), # IOS - event_type.CLICK_IOS.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.CLICK_IOS), - query=__generic_query(typename=event_type.CLICK_IOS.ui_type), - change_by_length=True), - event_type.INPUT_IOS.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.INPUT_IOS), - query=__generic_query(typename=event_type.INPUT_IOS.ui_type), - change_by_length=True), - event_type.VIEW_IOS.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.VIEW_IOS), - query=__generic_query(typename=event_type.VIEW_IOS.ui_type), - change_by_length=True), - event_type.CUSTOM_IOS.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.CUSTOM_IOS), - query=__generic_query(typename=event_type.CUSTOM_IOS.ui_type), - change_by_length=True), - event_type.REQUEST_IOS.ui_type: SupportedFilter(get=__generic_autocomplete(event_type.REQUEST_IOS), - query=__generic_query(typename=event_type.REQUEST_IOS.ui_type), - change_by_length=True), + event_type.CLICK_IOS.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.CLICK_IOS), + query=autocomplete.__generic_query( + typename=event_type.CLICK_IOS.ui_type)), + event_type.INPUT_IOS.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.INPUT_IOS), + query=autocomplete.__generic_query( + typename=event_type.INPUT_IOS.ui_type)), + event_type.VIEW_IOS.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.VIEW_IOS), + query=autocomplete.__generic_query( + typename=event_type.VIEW_IOS.ui_type)), + event_type.CUSTOM_IOS.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.CUSTOM_IOS), + query=autocomplete.__generic_query( + typename=event_type.CUSTOM_IOS.ui_type)), + event_type.REQUEST_IOS.ui_type: SupportedFilter(get=autocomplete.__generic_autocomplete(event_type.REQUEST_IOS), + query=autocomplete.__generic_query( + typename=event_type.REQUEST_IOS.ui_type)), event_type.ERROR_IOS.ui_type: SupportedFilter(get=__search_pg_errors_ios, - query=None, change_by_length=True), + query=None), } -def __get_autocomplete_table(value, project_id): - autocomplete_events = [schemas.FilterType.rev_id, - schemas.EventType.click, - schemas.FilterType.user_device, - schemas.FilterType.user_id, - schemas.FilterType.user_browser, - schemas.FilterType.user_os, - schemas.EventType.custom, - schemas.FilterType.user_country, - schemas.EventType.location, - schemas.EventType.input] - autocomplete_events.sort() - sub_queries = [] - for e in autocomplete_events: - sub_queries.append(f"""(SELECT type, value - FROM public.autocomplete - WHERE project_id = %(project_id)s - AND type= '{e}' - AND value ILIKE %(svalue)s - LIMIT 5)""") - if len(value) > 2: - sub_queries.append(f"""(SELECT type, value - FROM public.autocomplete - WHERE project_id = %(project_id)s - AND type= '{e}' - AND value ILIKE %(value)s - LIMIT 5)""") +def get_errors_by_session_id(session_id, project_id): with pg_client.PostgresClient() as cur: - query = cur.mogrify(" UNION ".join(sub_queries) + ";", - {"project_id": project_id, "value": helper.string_to_sql_like(value), - "svalue": helper.string_to_sql_like("^" + value)}) - try: - cur.execute(query) - except Exception as err: - print("--------- AUTOCOMPLETE SEARCH QUERY EXCEPTION -----------") - print(query.decode('UTF-8')) - print("--------- VALUE -----------") - print(value) - print("--------------------") - raise err - results = helper.list_to_camel_case(cur.fetchall()) - return results + cur.execute(cur.mogrify(f"""\ + SELECT er.*,ur.*, er.timestamp - s.start_ts AS time + FROM {event_type.ERROR.table} AS er INNER JOIN public.errors AS ur USING (error_id) INNER JOIN public.sessions AS s USING (session_id) + WHERE er.session_id = %(session_id)s AND s.project_id=%(project_id)s + ORDER BY timestamp;""", {"session_id": session_id, "project_id": project_id})) + errors = cur.fetchall() + for e in errors: + e["stacktrace_parsed_at"] = TimeUTC.datetime_to_timestamp(e["stacktrace_parsed_at"]) + return helper.list_to_camel_case(errors) def search(text, event_type, project_id, source, key): if not event_type: - return {"data": __get_autocomplete_table(text, project_id)} + return {"data": autocomplete.__get_autocomplete_table(text, project_id)} if event_type in SUPPORTED_TYPES.keys(): rows = SUPPORTED_TYPES[event_type].get(project_id=project_id, value=text, key=key, source=source) @@ -470,16 +392,3 @@ def search(text, event_type, project_id, source, key): return {"errors": ["unsupported event"]} return {"data": rows} - - -def get_errors_by_session_id(session_id, project_id): - with pg_client.PostgresClient() as cur: - cur.execute(cur.mogrify(f"""\ - SELECT er.*,ur.*, er.timestamp - s.start_ts AS time - FROM {event_type.ERROR.table} AS er INNER JOIN public.errors AS ur USING (error_id) INNER JOIN public.sessions AS s USING (session_id) - WHERE er.session_id = %(session_id)s AND s.project_id=%(project_id)s - ORDER BY timestamp;""", {"session_id": session_id, "project_id": project_id})) - errors = cur.fetchall() - for e in errors: - e["stacktrace_parsed_at"] = TimeUTC.datetime_to_timestamp(e["stacktrace_parsed_at"]) - return helper.list_to_camel_case(errors) diff --git a/api/chalicelib/core/funnels.py b/api/chalicelib/core/funnels.py index 3239f4705..65cb7b09a 100644 --- a/api/chalicelib/core/funnels.py +++ b/api/chalicelib/core/funnels.py @@ -138,8 +138,8 @@ def get_by_user(project_id, user_id, range_value=None, start_date=None, end_date get_start_end_time(filter_d=row["filter"], range_value=range_value, start_date=start_date, end_date=end_date) - counts = sessions.search2_pg(data=schemas.SessionsSearchPayloadSchema.parse_obj(row["filter"]), - project_id=project_id, user_id=None, count_only=True) + counts = sessions.search_sessions(data=schemas.SessionsSearchPayloadSchema.parse_obj(row["filter"]), + project_id=project_id, user_id=None, count_only=True) row["sessionsCount"] = counts["countSessions"] row["usersCount"] = counts["countUsers"] filter_clone = dict(row["filter"]) @@ -193,8 +193,8 @@ def get_sessions(project_id, funnel_id, user_id, range_value=None, start_date=No if f is None: return {"errors": ["funnel not found"]} get_start_end_time(filter_d=f["filter"], range_value=range_value, start_date=start_date, end_date=end_date) - return sessions.search2_pg(data=schemas.SessionsSearchPayloadSchema.parse_obj(f["filter"]), project_id=project_id, - user_id=user_id) + return sessions.search_sessions(data=schemas.SessionsSearchPayloadSchema.parse_obj(f["filter"]), project_id=project_id, + user_id=user_id) def get_sessions_on_the_fly(funnel_id, project_id, user_id, data: schemas.FunnelSearchPayloadSchema): @@ -207,8 +207,8 @@ def get_sessions_on_the_fly(funnel_id, project_id, user_id, data: schemas.Funnel get_start_end_time(filter_d=f["filter"], range_value=data.range_value, start_date=data.startDate, end_date=data.endDate) data = schemas.FunnelSearchPayloadSchema.parse_obj(f["filter"]) - return sessions.search2_pg(data=data, project_id=project_id, - user_id=user_id) + return sessions.search_sessions(data=data, project_id=project_id, + user_id=user_id) def get_top_insights(project_id, user_id, funnel_id, range_value=None, start_date=None, end_date=None): @@ -365,8 +365,8 @@ def search_by_issue(user_id, project_id, funnel_id, issue_id, data: schemas.Funn if i.get("issueId", "") == issue_id: issue = i break - return {"sessions": sessions.search2_pg(user_id=user_id, project_id=project_id, issue=issue, - data=data) if issue is not None else {"total": 0, "sessions": []}, + return {"sessions": sessions.search_sessions(user_id=user_id, project_id=project_id, issue=issue, + data=data) if issue is not None else {"total": 0, "sessions": []}, # "stages": helper.list_to_camel_case(insights), # "totalDropDueToIssues": total_drop_due_to_issues, "issue": issue} diff --git a/api/chalicelib/core/metrics.py b/api/chalicelib/core/metrics.py index 2aaaeb1d9..5d987e485 100644 --- a/api/chalicelib/core/metrics.py +++ b/api/chalicelib/core/metrics.py @@ -765,8 +765,8 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True, chart=True, data=args) pg_sub_query.append("resources.success = FALSE") pg_sub_query_chart.append("resources.success = FALSE") - pg_sub_query.append("resources.type != 'fetch'") - pg_sub_query_chart.append("resources.type != 'fetch'") + pg_sub_query.append("resources.type = 'img'") + pg_sub_query_chart.append("resources.type = 'img'") with pg_client.PostgresClient() as cur: pg_query = f"""SELECT @@ -1580,27 +1580,27 @@ def get_domains_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), step_size = __get_step_size(startTimestamp, endTimestamp, density, factor=1) pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, chart=True, - data=args, main_table="resources", time_column="timestamp", project=False, + data=args, main_table="requests", time_column="timestamp", project=False, duration=False) - pg_sub_query_subset.append("resources.timestamp>=%(startTimestamp)s") - pg_sub_query_subset.append("resources.timestamp<%(endTimestamp)s") - pg_sub_query_subset.append("resources.status/100 = %(status_code)s") + pg_sub_query_subset.append("requests.timestamp>=%(startTimestamp)s") + pg_sub_query_subset.append("requests.timestamp<%(endTimestamp)s") + pg_sub_query_subset.append("requests.status/100 = %(status_code)s") with pg_client.PostgresClient() as cur: - pg_query = f"""WITH resources AS(SELECT resources.url_host, timestamp - FROM events.resources INNER JOIN public.sessions USING (session_id) + pg_query = f"""WITH requests AS(SELECT requests.host, timestamp + FROM events_common.requests INNER JOIN public.sessions USING (session_id) WHERE {" AND ".join(pg_sub_query_subset)} ) SELECT generated_timestamp AS timestamp, - COALESCE(JSONB_AGG(resources) FILTER ( WHERE resources IS NOT NULL ), '[]'::JSONB) AS keys + COALESCE(JSONB_AGG(requests) FILTER ( WHERE requests IS NOT NULL ), '[]'::JSONB) AS keys FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( SELECT resources.url_host, COUNT(resources.*) AS count - FROM resources + LEFT JOIN LATERAL ( SELECT requests.host, COUNT(*) AS count + FROM requests WHERE {" AND ".join(pg_sub_query_chart)} - GROUP BY url_host + GROUP BY host ORDER BY count DESC LIMIT 5 - ) AS resources ON (TRUE) + ) AS requests ON (TRUE) GROUP BY generated_timestamp ORDER BY generated_timestamp;""" params = {"project_id": project_id, @@ -1625,37 +1625,37 @@ def get_domains_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), return result -def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), - endTimestamp=TimeUTC.now(), density=6, **args): +def __get_domains_errors_4xx_and_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): step_size = __get_step_size(startTimestamp, endTimestamp, density, factor=1) pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, chart=True, - data=args, main_table="resources", time_column="timestamp", project=False, + data=args, main_table="requests", time_column="timestamp", project=False, duration=False) - pg_sub_query_subset.append("resources.status/100 = %(status_code)s") + pg_sub_query_subset.append("requests.status/100 = %(status_code)s") with pg_client.PostgresClient() as cur: - pg_query = f"""WITH resources AS (SELECT resources.url_host, timestamp - FROM events.resources INNER JOIN public.sessions USING (session_id) + pg_query = f"""WITH requests AS (SELECT host, timestamp + FROM events_common.requests INNER JOIN public.sessions USING (session_id) WHERE {" AND ".join(pg_sub_query_subset)} ) SELECT generated_timestamp AS timestamp, - COALESCE(JSONB_AGG(resources) FILTER ( WHERE resources IS NOT NULL ), '[]'::JSONB) AS keys + COALESCE(JSONB_AGG(requests) FILTER ( WHERE requests IS NOT NULL ), '[]'::JSONB) AS keys FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( SELECT resources.url_host, COUNT(resources.url_host) AS count - FROM resources + LEFT JOIN LATERAL ( SELECT requests.host, COUNT(*) AS count + FROM requests WHERE {" AND ".join(pg_sub_query_chart)} - GROUP BY url_host + GROUP BY host ORDER BY count DESC LIMIT 5 - ) AS resources ON (TRUE) + ) AS requests ON (TRUE) GROUP BY generated_timestamp ORDER BY generated_timestamp;""" params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, "step_size": step_size, - "status_code": 4, **__get_constraint_values(args)} + "status_code": status, **__get_constraint_values(args)} cur.execute(cur.mogrify(pg_query, params)) rows = cur.fetchall() rows = __nested_array_to_dict_array(rows) @@ -1665,44 +1665,16 @@ def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1) return rows +def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + return __get_domains_errors_4xx_and_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) + + def get_domains_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), density=6, **args): - step_size = __get_step_size(startTimestamp, endTimestamp, density, factor=1) - pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) - pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, chart=True, - data=args, main_table="resources", time_column="timestamp", project=False, - duration=False) - pg_sub_query_subset.append("resources.status/100 = %(status_code)s") - - with pg_client.PostgresClient() as cur: - pg_query = f"""WITH resources AS (SELECT resources.url_host, timestamp - FROM events.resources INNER JOIN public.sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query_subset)} - ) - SELECT generated_timestamp AS timestamp, - COALESCE(JSONB_AGG(resources) FILTER ( WHERE resources IS NOT NULL ), '[]'::JSONB) AS keys - FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL ( SELECT resources.url_host, COUNT(resources.url_host) AS count - FROM resources - WHERE {" AND ".join(pg_sub_query_chart)} - GROUP BY url_host - ORDER BY count DESC - LIMIT 5 - ) AS resources ON (TRUE) - GROUP BY generated_timestamp - ORDER BY generated_timestamp;""" - params = {"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, - "step_size": step_size, - "status_code": 5, **__get_constraint_values(args)} - cur.execute(cur.mogrify(pg_query, params)) - rows = cur.fetchall() - rows = __nested_array_to_dict_array(rows) - neutral = __get_neutral(rows) - rows = __merge_rows_with_neutral(rows, neutral) - - return rows + return __get_domains_errors_4xx_and_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) def __nested_array_to_dict_array(rows, key="url_host", value="count"): @@ -1747,15 +1719,15 @@ def get_slowest_domains(project_id, startTimestamp=TimeUTC.now(delta_days=-1), def get_errors_per_domains(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), **args): pg_sub_query = __get_constraints(project_id=project_id, data=args) - pg_sub_query.append("resources.success = FALSE") + pg_sub_query.append("requests.success = FALSE") with pg_client.PostgresClient() as cur: pg_query = f"""SELECT - resources.url_host AS domain, - COUNT(resources.session_id) AS errors_count - FROM events.resources INNER JOIN sessions USING (session_id) + requests.host AS domain, + COUNT(requests.session_id) AS errors_count + FROM events_common.requests INNER JOIN sessions USING (session_id) WHERE {" AND ".join(pg_sub_query)} - GROUP BY resources.url_host + GROUP BY requests.host ORDER BY errors_count DESC LIMIT 5;""" cur.execute(cur.mogrify(pg_query, {"project_id": project_id, @@ -1823,7 +1795,7 @@ def get_calls_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endT FROM events.resources INNER JOIN sessions USING (session_id) WHERE {" AND ".join(pg_sub_query)} GROUP BY resources.method, resources.url_hostpath - ORDER BY (4 + 5), 3 DESC + ORDER BY (4 + 5) DESC, 3 DESC LIMIT 50;""" cur.execute(cur.mogrify(pg_query, {"project_id": project_id, "startTimestamp": startTimestamp, @@ -1832,50 +1804,45 @@ def get_calls_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endT return helper.list_to_camel_case(rows) -def get_calls_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), - platform=None, **args): +def __get_calls_errors_4xx_or_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + platform=None, **args): pg_sub_query = __get_constraints(project_id=project_id, data=args) - pg_sub_query.append("resources.type = 'fetch'") - pg_sub_query.append("resources.method IS NOT NULL") - pg_sub_query.append("resources.status/100 = 4") + pg_sub_query.append("requests.type = 'fetch'") + pg_sub_query.append("requests.method IS NOT NULL") + pg_sub_query.append(f"requests.status/100 = {status}") with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT resources.method, - resources.url_hostpath, - COUNT(resources.session_id) AS all_requests - FROM events.resources INNER JOIN sessions USING (session_id) + pg_query = f"""SELECT requests.method, + requests.host, + requests.path, + COUNT(requests.session_id) AS all_requests + FROM events_common.requests INNER JOIN sessions USING (session_id) WHERE {" AND ".join(pg_sub_query)} - GROUP BY resources.method, resources.url_hostpath + GROUP BY requests.method, requests.host, requests.path ORDER BY all_requests DESC LIMIT 10;""" cur.execute(cur.mogrify(pg_query, {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)})) rows = cur.fetchall() + for r in rows: + r["url_hostpath"] = r.pop("host") + r.pop("path") return helper.list_to_camel_case(rows) +def get_calls_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, **args): + return __get_calls_errors_4xx_or_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, + platform=platform, **args) + + def get_calls_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), platform=None, **args): - pg_sub_query = __get_constraints(project_id=project_id, data=args) - pg_sub_query.append("resources.type = 'fetch'") - pg_sub_query.append("resources.method IS NOT NULL") - pg_sub_query.append("resources.status/100 = 5") - - with pg_client.PostgresClient() as cur: - pg_query = f"""SELECT resources.method, - resources.url_hostpath, - COUNT(resources.session_id) AS all_requests - FROM events.resources INNER JOIN sessions USING (session_id) - WHERE {" AND ".join(pg_sub_query)} - GROUP BY resources.method, resources.url_hostpath - ORDER BY all_requests DESC - LIMIT 10;""" - cur.execute(cur.mogrify(pg_query, {"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)})) - rows = cur.fetchall() - return helper.list_to_camel_case(rows) + return __get_calls_errors_4xx_or_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, + platform=platform, **args) def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), @@ -1883,10 +1850,9 @@ def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), e step_size = __get_step_size(startTimestamp, endTimestamp, density, factor=1) pg_sub_query_subset = __get_constraints(project_id=project_id, data=args) - pg_sub_query_subset.append("resources.timestamp>=%(startTimestamp)s") - pg_sub_query_subset.append("resources.timestamp<%(endTimestamp)s") - pg_sub_query_subset.append("resources.type != 'fetch'") - pg_sub_query_subset.append("resources.status > 200") + pg_sub_query_subset.append("requests.timestamp>=%(startTimestamp)s") + pg_sub_query_subset.append("requests.timestamp<%(endTimestamp)s") + pg_sub_query_subset.append("requests.status_code > 200") pg_sub_query_subset_e = __get_constraints(project_id=project_id, data=args, duration=False, main_table="m_errors", time_constraint=False) @@ -1897,8 +1863,8 @@ def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), e pg_sub_query_subset_e.append("timestamp<%(endTimestamp)s") with pg_client.PostgresClient() as cur: - pg_query = f"""WITH resources AS (SELECT status, timestamp - FROM events.resources + pg_query = f"""WITH requests AS (SELECT status_code AS status, timestamp + FROM events_common.requests INNER JOIN public.sessions USING (session_id) WHERE {" AND ".join(pg_sub_query_subset)} ), @@ -1927,7 +1893,7 @@ def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), e ), 0) AS integrations FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp LEFT JOIN LATERAL (SELECT status - FROM resources + FROM requests WHERE {" AND ".join(pg_sub_query_chart)} ) AS errors_partition ON (TRUE) GROUP BY timestamp @@ -2169,44 +2135,44 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1) pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True, chart=False, data=args) pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, project=False, - chart=True, data=args, main_table="resources", time_column="timestamp", + chart=True, data=args, main_table="requests", time_column="timestamp", duration=False) - pg_sub_query_subset.append("resources.timestamp >= %(startTimestamp)s") - pg_sub_query_subset.append("resources.timestamp < %(endTimestamp)s") - pg_sub_query_subset.append("resources.success = FALSE") + pg_sub_query_subset.append("requests.timestamp >= %(startTimestamp)s") + pg_sub_query_subset.append("requests.timestamp < %(endTimestamp)s") + # pg_sub_query_subset.append("resources.type IN ('fetch', 'script')") + pg_sub_query_subset.append("requests.success = FALSE") with pg_client.PostgresClient() as cur: - pg_query = f"""WITH resources AS ( - SELECT resources.url_host, timestamp - FROM events.resources + pg_query = f"""WITH requests AS ( + SELECT requests.host, timestamp + FROM events_common.requests INNER JOIN public.sessions USING (session_id) WHERE {" AND ".join(pg_sub_query_subset)} ) SELECT generated_timestamp AS timestamp, - SUM(CASE WHEN first.url_host = sub_resources.url_host THEN 1 ELSE 0 END) AS first_party, - SUM(CASE WHEN first.url_host != sub_resources.url_host THEN 1 ELSE 0 END) AS third_party + SUM(CASE WHEN first.host = sub_requests.host THEN 1 ELSE 0 END) AS first_party, + SUM(CASE WHEN first.host != sub_requests.host THEN 1 ELSE 0 END) AS third_party FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp LEFT JOIN ( - SELECT resources.url_host, - COUNT(resources.session_id) AS count - FROM events.resources + SELECT requests.host, + COUNT(requests.session_id) AS count + FROM events_common.requests INNER JOIN public.sessions USING (session_id) WHERE sessions.project_id = '1' - AND resources.type IN ('fetch', 'script') AND sessions.start_ts > (EXTRACT(EPOCH FROM now() - INTERVAL '31 days') * 1000)::BIGINT AND sessions.start_ts < (EXTRACT(EPOCH FROM now()) * 1000)::BIGINT - AND resources.timestamp > (EXTRACT(EPOCH FROM now() - INTERVAL '31 days') * 1000)::BIGINT - AND resources.timestamp < (EXTRACT(EPOCH FROM now()) * 1000)::BIGINT + AND requests.timestamp > (EXTRACT(EPOCH FROM now() - INTERVAL '31 days') * 1000)::BIGINT + AND requests.timestamp < (EXTRACT(EPOCH FROM now()) * 1000)::BIGINT AND sessions.duration>0 - GROUP BY resources.url_host + GROUP BY requests.host ORDER BY count DESC LIMIT 1 ) AS first ON (TRUE) LEFT JOIN LATERAL ( - SELECT resources.url_host - FROM resources + SELECT requests.host + FROM requests WHERE {" AND ".join(pg_sub_query_chart)} - ) AS sub_resources ON (TRUE) + ) AS sub_requests ON (TRUE) GROUP BY generated_timestamp ORDER BY generated_timestamp;""" cur.execute(cur.mogrify(pg_query, {"step_size": step_size, diff --git a/api/chalicelib/core/projects.py b/api/chalicelib/core/projects.py index 0893f6259..100fe6765 100644 --- a/api/chalicelib/core/projects.py +++ b/api/chalicelib/core/projects.py @@ -43,25 +43,53 @@ def __create(tenant_id, name): def get_projects(tenant_id, recording_state=False, gdpr=None, recorded=False, stack_integrations=False): with pg_client.PostgresClient() as cur: - recorded_q = "" + extra_projection = "" + extra_join = "" + if gdpr: + extra_projection += ',s.gdpr' if recorded: - recorded_q = """, COALESCE((SELECT TRUE - FROM public.sessions - WHERE sessions.project_id = s.project_id - AND sessions.start_ts >= (EXTRACT(EPOCH FROM s.created_at) * 1000 - 24 * 60 * 60 * 1000) - AND sessions.start_ts <= %(now)s - LIMIT 1), FALSE) AS recorded""" - query = cur.mogrify(f"""SELECT - s.project_id, s.name, s.project_key, s.save_request_payloads - {',s.gdpr' if gdpr else ''} - {recorded_q} - {',stack_integrations.count>0 AS stack_integrations' if stack_integrations else ''} + extra_projection += """, COALESCE(nullif(EXTRACT(EPOCH FROM s.first_recorded_session_at) * 1000, NULL)::BIGINT, + (SELECT MIN(sessions.start_ts) + FROM public.sessions + WHERE sessions.project_id = s.project_id + AND sessions.start_ts >= (EXTRACT(EPOCH FROM + COALESCE(s.sessions_last_check_at, s.created_at)) * 1000-24*60*60*1000) + AND sessions.start_ts <= %(now)s + LIMIT 1), NULL) AS first_recorded""" + if stack_integrations: + extra_projection += ',stack_integrations.count>0 AS stack_integrations' + + if stack_integrations: + extra_join = """LEFT JOIN LATERAL (SELECT COUNT(*) AS count + FROM public.integrations + WHERE s.project_id = integrations.project_id + LIMIT 1) AS stack_integrations ON TRUE""" + + query = cur.mogrify(f"""{"SELECT *, first_recorded IS NOT NULL AS recorded FROM (" if recorded else ""} + SELECT s.project_id, s.name, s.project_key, s.save_request_payloads, s.first_recorded_session_at + {extra_projection} FROM public.projects AS s - {'LEFT JOIN LATERAL (SELECT COUNT(*) AS count FROM public.integrations WHERE s.project_id = integrations.project_id LIMIT 1) AS stack_integrations ON TRUE' if stack_integrations else ''} + {extra_join} WHERE s.deleted_at IS NULL - ORDER BY s.project_id;""", {"now": TimeUTC.now()}) + ORDER BY s.project_id {") AS raw" if recorded else ""};""", {"now": TimeUTC.now()}) cur.execute(query) rows = cur.fetchall() + # if recorded is requested, check if it was saved or computed + if recorded: + for r in rows: + if r["first_recorded_session_at"] is None: + extra_update = "" + if r["recorded"]: + extra_update = ", first_recorded_session_at=to_timestamp(%(first_recorded)s/1000)" + query = cur.mogrify(f"""UPDATE public.projects + SET sessions_last_check_at=(now() at time zone 'utc') + {extra_update} + WHERE project_id=%(project_id)s""", + {"project_id": r["project_id"], "first_recorded": r["first_recorded"]}) + cur.execute(query) + r.pop("first_recorded_session_at") + r.pop("first_recorded") + if recording_state: project_ids = [f'({r["project_id"]})' for r in rows] query = cur.mogrify(f"""SELECT projects.project_id, COALESCE(MAX(start_ts), 0) AS last diff --git a/api/chalicelib/core/sessions.py b/api/chalicelib/core/sessions.py index c044a5819..e5a704298 100644 --- a/api/chalicelib/core/sessions.py +++ b/api/chalicelib/core/sessions.py @@ -2,7 +2,7 @@ from typing import List import schemas from chalicelib.core import events, metadata, events_ios, \ - sessions_mobs, issues, projects, errors, resources, assist, performance_event + sessions_mobs, issues, projects, errors, resources, assist, performance_event, sessions_viewed, sessions_favorite from chalicelib.utils import pg_client, helper, metrics_helper SESSION_PROJECTION_COLS = """s.project_id, @@ -172,8 +172,12 @@ def _isUndefined_operator(op: schemas.SearchEventOperator): return op in [schemas.SearchEventOperator._is_undefined] -def search2_pg(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False, - error_status=schemas.ErrorStatus.all, count_only=False, issue=None): +# This function executes the query and return result +def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False, + error_status=schemas.ErrorStatus.all, count_only=False, issue=None): + if data.bookmarked: + data.startDate,data.endDate = sessions_favorite.get_start_end_timestamp(project_id,user_id) + full_args, query_part = search_query_parts(data=data, error_status=error_status, errors_only=errors_only, favorite_only=data.bookmarked, issue=issue, project_id=project_id, user_id=user_id) @@ -187,16 +191,12 @@ def search2_pg(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, e meta_keys = [] with pg_client.PostgresClient() as cur: if errors_only: - main_query = cur.mogrify(f"""SELECT DISTINCT er.error_id, ser.status, ser.parent_error_id, ser.payload, - COALESCE((SELECT TRUE - FROM public.user_favorite_sessions AS fs - WHERE s.session_id = fs.session_id - AND fs.user_id = %(userId)s), FALSE) AS favorite, - COALESCE((SELECT TRUE + main_query = cur.mogrify(f"""SELECT DISTINCT er.error_id, + COALESCE((SELECT TRUE FROM public.user_viewed_errors AS ve WHERE er.error_id = ve.error_id AND ve.user_id = %(userId)s LIMIT 1), FALSE) AS viewed - {query_part};""", full_args) + {query_part};""", full_args) elif count_only: main_query = cur.mogrify(f"""SELECT COUNT(DISTINCT s.session_id) AS count_sessions, @@ -401,6 +401,7 @@ def __is_valid_event(is_any: bool, event: schemas._SessionSearchEventSchema): event.filters is None or len(event.filters) == 0)) +# this function generates the query and return the generated-query with the dict of query arguments def search_query_parts(data, error_status, errors_only, favorite_only, issue, project_id, user_id, extra_event=None): ss_constraints = [] full_args = {"project_id": project_id, "startDate": data.startDate, "endDate": data.endDate, @@ -717,7 +718,7 @@ def search_query_parts(data, error_status, errors_only, favorite_only, issue, pr event_where.append( _multiple_conditions(f"(main1.message {op} %({e_k})s OR main1.name {op} %({e_k})s)", event.value, value_key=e_k)) - if event.source[0] not in [None, "*", ""]: + if len(event.source) > 0 and event.source[0] not in [None, "*", ""]: event_where.append(_multiple_conditions(f"main1.source = %({s_k})s", event.source, value_key=s_k)) @@ -989,13 +990,13 @@ def search_query_parts(data, error_status, errors_only, favorite_only, issue, pr extra_from += f" INNER JOIN {events.event_type.ERROR.table} AS er USING (session_id) INNER JOIN public.errors AS ser USING (error_id)" extra_constraints.append("ser.source = 'js_exception'") extra_constraints.append("ser.project_id = %(project_id)s") - if error_status != schemas.ErrorStatus.all: - extra_constraints.append("ser.status = %(error_status)s") - full_args["error_status"] = error_status - if favorite_only: - extra_from += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" - extra_constraints.append("ufe.user_id = %(userId)s") - # extra_constraints = [extra.decode('UTF-8') + "\n" for extra in extra_constraints] + # if error_status != schemas.ErrorStatus.all: + # extra_constraints.append("ser.status = %(error_status)s") + # full_args["error_status"] = error_status + # if favorite_only: + # extra_from += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" + # extra_constraints.append("ufe.user_id = %(userId)s") + if favorite_only and not errors_only and user_id is not None: extra_from += """INNER JOIN (SELECT user_id, session_id FROM public.user_favorite_sessions diff --git a/api/chalicelib/core/sessions_favorite_viewed.py b/api/chalicelib/core/sessions_favorite.py similarity index 68% rename from api/chalicelib/core/sessions_favorite_viewed.py rename to api/chalicelib/core/sessions_favorite.py index 7f503679c..728ba649d 100644 --- a/api/chalicelib/core/sessions_favorite_viewed.py +++ b/api/chalicelib/core/sessions_favorite.py @@ -6,10 +6,8 @@ def add_favorite_session(project_id, user_id, session_id): with pg_client.PostgresClient() as cur: cur.execute( cur.mogrify(f"""\ - INSERT INTO public.user_favorite_sessions - (user_id, session_id) - VALUES - (%(userId)s,%(sessionId)s);""", + INSERT INTO public.user_favorite_sessions(user_id, session_id) + VALUES (%(userId)s,%(sessionId)s);""", {"userId": user_id, "sessionId": session_id}) ) return sessions.get_by_id2_pg(project_id=project_id, session_id=session_id, user_id=user_id, full_data=False, @@ -21,8 +19,7 @@ def remove_favorite_session(project_id, user_id, session_id): cur.execute( cur.mogrify(f"""\ DELETE FROM public.user_favorite_sessions - WHERE - user_id = %(userId)s + WHERE user_id = %(userId)s AND session_id = %(sessionId)s;""", {"userId": user_id, "sessionId": session_id}) ) @@ -30,19 +27,6 @@ def remove_favorite_session(project_id, user_id, session_id): include_fav_viewed=True) -def add_viewed_session(project_id, user_id, session_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify("""\ - INSERT INTO public.user_viewed_sessions - (user_id, session_id) - VALUES - (%(userId)s,%(sessionId)s) - ON CONFLICT DO NOTHING;""", - {"userId": user_id, "sessionId": session_id}) - ) - - def favorite_session(project_id, user_id, session_id): if favorite_session_exists(user_id=user_id, session_id=session_id): return remove_favorite_session(project_id=project_id, user_id=user_id, session_id=session_id) @@ -50,16 +34,11 @@ def favorite_session(project_id, user_id, session_id): return add_favorite_session(project_id=project_id, user_id=user_id, session_id=session_id) -def view_session(project_id, user_id, session_id): - return add_viewed_session(project_id=project_id, user_id=user_id, session_id=session_id) - - def favorite_session_exists(user_id, session_id): with pg_client.PostgresClient() as cur: cur.execute( cur.mogrify( - """SELECT - session_id + """SELECT session_id FROM public.user_favorite_sessions WHERE user_id = %(userId)s @@ -68,3 +47,18 @@ def favorite_session_exists(user_id, session_id): ) r = cur.fetchone() return r is not None + + +def get_start_end_timestamp(project_id, user_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify( + """SELECT max(start_ts) AS max_start_ts, min(start_ts) AS min_start_ts + FROM public.user_favorite_sessions INNER JOIN sessions USING(session_id) + WHERE + user_favorite_sessions.user_id = %(userId)s + AND project_id = %(project_id)s;""", + {"userId": user_id, "project_id": project_id}) + ) + r = cur.fetchone() + return 0, 0 if r is None else r["max_start_ts"], r["min_start_ts"] diff --git a/api/chalicelib/core/sessions_metas.py b/api/chalicelib/core/sessions_metas.py index 07aad2ee4..f7e98eb69 100644 --- a/api/chalicelib/core/sessions_metas.py +++ b/api/chalicelib/core/sessions_metas.py @@ -1,206 +1,66 @@ import schemas -from chalicelib.utils import pg_client, helper +from chalicelib.core import autocomplete from chalicelib.utils.event_filter_definition import SupportedFilter - -def get_key_values(project_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify( - f"""\ - SELECT ARRAY_AGG(DISTINCT s.user_os - ORDER BY s.user_os) FILTER ( WHERE s.user_os IS NOT NULL AND s.platform='web') AS {schemas.FilterType.user_os}, - ARRAY_AGG(DISTINCT s.user_browser - ORDER BY s.user_browser) - FILTER ( WHERE s.user_browser IS NOT NULL AND s.platform='web') AS {schemas.FilterType.user_browser}, - ARRAY_AGG(DISTINCT s.user_device - ORDER BY s.user_device) - FILTER ( WHERE s.user_device IS NOT NULL AND s.user_device != '' AND s.platform='web') AS {schemas.FilterType.user_device}, - ARRAY_AGG(DISTINCT s.user_country - ORDER BY s.user_country) - FILTER ( WHERE s.user_country IS NOT NULL AND s.platform='web')::text[] AS {schemas.FilterType.user_country}, - ARRAY_AGG(DISTINCT s.user_id - ORDER BY s.user_id) FILTER ( WHERE s.user_id IS NOT NULL AND s.user_id != 'none' AND s.user_id != '' AND s.platform='web') AS {schemas.FilterType.user_id}, - ARRAY_AGG(DISTINCT s.user_anonymous_id - ORDER BY s.user_anonymous_id) FILTER ( WHERE s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != 'none' AND s.user_anonymous_id != '' AND s.platform='web') AS {schemas.FilterType.user_anonymous_id}, - ARRAY_AGG(DISTINCT s.rev_id - ORDER BY s.rev_id) FILTER ( WHERE s.rev_id IS NOT NULL AND s.platform='web') AS {schemas.FilterType.rev_id}, - ARRAY_AGG(DISTINCT p.referrer - ORDER BY p.referrer) - FILTER ( WHERE p.referrer != '' ) AS {schemas.FilterType.referrer}, - - ARRAY_AGG(DISTINCT s.utm_source - ORDER BY s.utm_source) FILTER ( WHERE s.utm_source IS NOT NULL AND s.utm_source != 'none' AND s.utm_source != '') AS {schemas.FilterType.utm_source}, - ARRAY_AGG(DISTINCT s.utm_medium - ORDER BY s.utm_medium) FILTER ( WHERE s.utm_medium IS NOT NULL AND s.utm_medium != 'none' AND s.utm_medium != '') AS {schemas.FilterType.utm_medium}, - ARRAY_AGG(DISTINCT s.utm_campaign - ORDER BY s.utm_campaign) FILTER ( WHERE s.utm_campaign IS NOT NULL AND s.utm_campaign != 'none' AND s.utm_campaign != '') AS {schemas.FilterType.utm_campaign}, - - ARRAY_AGG(DISTINCT s.user_os - ORDER BY s.user_os) FILTER ( WHERE s.user_os IS NOT NULL AND s.platform='ios' ) AS {schemas.FilterType.user_os_ios}, - ARRAY_AGG(DISTINCT s.user_device - ORDER BY s.user_device) - FILTER ( WHERE s.user_device IS NOT NULL AND s.user_device != '' AND s.platform='ios') AS {schemas.FilterType.user_device_ios}, - ARRAY_AGG(DISTINCT s.user_country - ORDER BY s.user_country) - FILTER ( WHERE s.user_country IS NOT NULL AND s.platform='ios')::text[] AS {schemas.FilterType.user_country_ios}, - ARRAY_AGG(DISTINCT s.user_id - ORDER BY s.user_id) FILTER ( WHERE s.user_id IS NOT NULL AND s.user_id != 'none' AND s.user_id != '' AND s.platform='ios') AS {schemas.FilterType.user_id_ios}, - ARRAY_AGG(DISTINCT s.user_anonymous_id - ORDER BY s.user_anonymous_id) FILTER ( WHERE s.user_anonymous_id IS NOT NULL AND s.user_anonymous_id != 'none' AND s.user_anonymous_id != '' AND s.platform='ios') AS {schemas.FilterType.user_anonymous_id_ios}, - ARRAY_AGG(DISTINCT s.rev_id - ORDER BY s.rev_id) FILTER ( WHERE s.rev_id IS NOT NULL AND s.platform='ios') AS {schemas.FilterType.rev_id_ios} - FROM public.sessions AS s - LEFT JOIN events.pages AS p USING (session_id) - WHERE s.project_id = %(site_id)s;""", - {"site_id": project_id} - ) - ) - - row = cur.fetchone() - for k in row.keys(): - if row[k] is None: - row[k] = [] - elif len(row[k]) > 500: - row[k] = row[k][:500] - return helper.dict_to_CAPITAL_keys(row) - - -def get_top_key_values(project_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify( - f"""\ - SELECT {",".join([f"ARRAY((SELECT value FROM public.autocomplete WHERE project_id = %(site_id)s AND type='{k}' GROUP BY value ORDER BY COUNT(*) DESC LIMIT %(limit)s)) AS {k}" for k in SUPPORTED_TYPES.keys()])};""", - {"site_id": project_id, "limit": 5} - ) - ) - - row = cur.fetchone() - return helper.dict_to_CAPITAL_keys(row) - - -def __generic_query(typename, value_length=None): - if value_length is None or value_length > 2: - return f""" (SELECT DISTINCT value, type - FROM public.autocomplete - WHERE - project_id = %(project_id)s - AND type ='{typename}' - AND value ILIKE %(svalue)s - ORDER BY value - LIMIT 5) - UNION - (SELECT DISTINCT value, type - FROM public.autocomplete - WHERE - project_id = %(project_id)s - AND type ='{typename}' - AND value ILIKE %(value)s - ORDER BY value - LIMIT 5);""" - return f""" SELECT DISTINCT value, type - FROM public.autocomplete - WHERE - project_id = %(project_id)s - AND type ='{typename}' - AND value ILIKE %(svalue)s - ORDER BY value - LIMIT 10;""" - - -def __generic_autocomplete(typename): - def f(project_id, text): - with pg_client.PostgresClient() as cur: - query = cur.mogrify(__generic_query(typename, - value_length=len(text) \ - if SUPPORTED_TYPES[typename].change_by_length else None), - {"project_id": project_id, "value": helper.string_to_sql_like(text), - "svalue": helper.string_to_sql_like("^" + text)}) - - cur.execute(query) - rows = cur.fetchall() - return rows - - return f - - SUPPORTED_TYPES = { schemas.FilterType.user_os: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_os), - query=__generic_query(typename=schemas.FilterType.user_os), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_os), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_os)), schemas.FilterType.user_browser: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_browser), - query=__generic_query(typename=schemas.FilterType.user_browser), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_browser), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_browser)), schemas.FilterType.user_device: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_device), - query=__generic_query(typename=schemas.FilterType.user_device), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_device), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_device)), schemas.FilterType.user_country: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_country), - query=__generic_query(typename=schemas.FilterType.user_country), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_country), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_country)), schemas.FilterType.user_id: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_id), - query=__generic_query(typename=schemas.FilterType.user_id), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_id), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_id)), schemas.FilterType.user_anonymous_id: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_anonymous_id), - query=__generic_query(typename=schemas.FilterType.user_anonymous_id), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_anonymous_id), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_anonymous_id)), schemas.FilterType.rev_id: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.rev_id), - query=__generic_query(typename=schemas.FilterType.rev_id), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.rev_id), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.rev_id)), schemas.FilterType.referrer: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.referrer), - query=__generic_query(typename=schemas.FilterType.referrer), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.referrer), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.referrer)), schemas.FilterType.utm_campaign: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.utm_campaign), - query=__generic_query(typename=schemas.FilterType.utm_campaign), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.utm_campaign), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.utm_campaign)), schemas.FilterType.utm_medium: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.utm_medium), - query=__generic_query(typename=schemas.FilterType.utm_medium), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.utm_medium), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.utm_medium)), schemas.FilterType.utm_source: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.utm_source), - query=__generic_query(typename=schemas.FilterType.utm_source), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.utm_source), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.utm_source)), # IOS schemas.FilterType.user_os_ios: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_os_ios), - query=__generic_query(typename=schemas.FilterType.user_os_ios), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_os_ios), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_os_ios)), schemas.FilterType.user_device_ios: SupportedFilter( - get=__generic_autocomplete( + get=autocomplete.__generic_autocomplete_metas( typename=schemas.FilterType.user_device_ios), - query=__generic_query(typename=schemas.FilterType.user_device_ios), - change_by_length=True), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_device_ios)), schemas.FilterType.user_country_ios: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_country_ios), - query=__generic_query(typename=schemas.FilterType.user_country_ios), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_country_ios), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_country_ios)), schemas.FilterType.user_id_ios: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_id_ios), - query=__generic_query(typename=schemas.FilterType.user_id_ios), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_id_ios), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_id_ios)), schemas.FilterType.user_anonymous_id_ios: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.user_anonymous_id_ios), - query=__generic_query(typename=schemas.FilterType.user_anonymous_id_ios), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_anonymous_id_ios), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.user_anonymous_id_ios)), schemas.FilterType.rev_id_ios: SupportedFilter( - get=__generic_autocomplete(typename=schemas.FilterType.rev_id_ios), - query=__generic_query(typename=schemas.FilterType.rev_id_ios), - change_by_length=True), + get=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.rev_id_ios), + query=autocomplete.__generic_autocomplete_metas(typename=schemas.FilterType.rev_id_ios)), } -def search(text, meta_type, project_id): +def search(text: str, meta_type: schemas.FilterType, project_id: int): rows = [] if meta_type not in list(SUPPORTED_TYPES.keys()): return {"errors": ["unsupported type"]} diff --git a/api/chalicelib/core/sessions_viewed.py b/api/chalicelib/core/sessions_viewed.py new file mode 100644 index 000000000..c9b2c9b46 --- /dev/null +++ b/api/chalicelib/core/sessions_viewed.py @@ -0,0 +1,11 @@ +from chalicelib.utils import pg_client + + +def view_session(project_id, user_id, session_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify("""INSERT INTO public.user_viewed_sessions(user_id, session_id) + VALUES (%(userId)s,%(sessionId)s) + ON CONFLICT DO NOTHING;""", + {"userId": user_id, "sessionId": session_id}) + ) diff --git a/api/chalicelib/core/significance.py b/api/chalicelib/core/significance.py index 9bd0fa966..d6f46da70 100644 --- a/api/chalicelib/core/significance.py +++ b/api/chalicelib/core/significance.py @@ -559,8 +559,8 @@ def get_top_insights(filter_d, project_id): "dropDueToIssues": 0 }] - counts = sessions.search2_pg(data=schemas.SessionsSearchCountSchema.parse_obj(filter_d), project_id=project_id, - user_id=None, count_only=True) + counts = sessions.search_sessions(data=schemas.SessionsSearchCountSchema.parse_obj(filter_d), project_id=project_id, + user_id=None, count_only=True) output[0]["sessionsCount"] = counts["countSessions"] output[0]["usersCount"] = counts["countUsers"] return output, 0 diff --git a/api/chalicelib/core/signup.py b/api/chalicelib/core/signup.py index 146da7305..23c2c8744 100644 --- a/api/chalicelib/core/signup.py +++ b/api/chalicelib/core/signup.py @@ -45,7 +45,7 @@ def create_step1(data: schemas.UserSignupSchema): print("Verifying company's name validity") company_name = data.organizationName - if company_name is None or len(company_name) < 1 or not helper.is_alphanumeric_space(company_name): + if company_name is None or len(company_name) < 1: errors.append("invalid organization's name") print("Verifying project's name validity") diff --git a/api/chalicelib/core/users.py b/api/chalicelib/core/users.py index 1535534c8..6762d9a6f 100644 --- a/api/chalicelib/core/users.py +++ b/api/chalicelib/core/users.py @@ -181,7 +181,7 @@ def create_member(tenant_id, user_id, data, background_tasks: BackgroundTasks): if user: return {"errors": ["user already exists"]} name = data.get("name", None) - if name is not None and not helper.is_alphabet_latin_space(name): + if name is not None and len(name) == 0: return {"errors": ["invalid user name"]} if name is None: name = data["email"] diff --git a/api/chalicelib/utils/event_filter_definition.py b/api/chalicelib/utils/event_filter_definition.py index b21d49b9c..93b1b9d5f 100644 --- a/api/chalicelib/utils/event_filter_definition.py +++ b/api/chalicelib/utils/event_filter_definition.py @@ -6,7 +6,6 @@ class Event: class SupportedFilter: - def __init__(self, get, query, change_by_length): + def __init__(self, get, query): self.get = get self.query = query - self.change_by_length = change_by_length diff --git a/api/requirements-alerts.txt b/api/requirements-alerts.txt index 788c58767..fc141eb09 100644 --- a/api/requirements-alerts.txt +++ b/api/requirements-alerts.txt @@ -1,15 +1,15 @@ requests==2.28.1 urllib3==1.26.10 -boto3==1.24.26 +boto3==1.24.53 pyjwt==2.4.0 psycopg2-binary==2.9.3 -elasticsearch==8.3.1 +elasticsearch==8.3.3 jira==3.3.1 -fastapi==0.78.0 +fastapi==0.80.0 uvicorn[standard]==0.18.2 python-decouple==3.6 -pydantic[email]==1.9.1 +pydantic[email]==1.9.2 apscheduler==3.9.1 \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt index 788c58767..fc141eb09 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,15 +1,15 @@ requests==2.28.1 urllib3==1.26.10 -boto3==1.24.26 +boto3==1.24.53 pyjwt==2.4.0 psycopg2-binary==2.9.3 -elasticsearch==8.3.1 +elasticsearch==8.3.3 jira==3.3.1 -fastapi==0.78.0 +fastapi==0.80.0 uvicorn[standard]==0.18.2 python-decouple==3.6 -pydantic[email]==1.9.1 +pydantic[email]==1.9.2 apscheduler==3.9.1 \ No newline at end of file diff --git a/api/routers/core.py b/api/routers/core.py index 2bc4a4dd4..35f27e248 100644 --- a/api/routers/core.py +++ b/api/routers/core.py @@ -1,4 +1,4 @@ -from typing import Union, Optional +from typing import Union from decouple import config from fastapi import Depends, Body, BackgroundTasks, HTTPException @@ -7,13 +7,13 @@ from starlette import status import schemas from chalicelib.core import log_tool_rollbar, sourcemaps, events, sessions_assignments, projects, \ - sessions_metas, alerts, funnels, issues, integrations_manager, metadata, \ + alerts, funnels, issues, integrations_manager, metadata, \ log_tool_elasticsearch, log_tool_datadog, \ - log_tool_stackdriver, reset_password, sessions_favorite_viewed, \ + log_tool_stackdriver, reset_password, sessions_favorite, \ log_tool_cloudwatch, log_tool_sentry, log_tool_sumologic, log_tools, errors, sessions, \ log_tool_newrelic, announcements, log_tool_bugsnag, weekly_report, integration_jira_cloud, integration_github, \ - assist, heatmaps, mobile, signup, tenants, errors_favorite_viewed, boarding, notifications, webhook, users, \ - custom_metrics, saved_search, integrations_global + assist, heatmaps, mobile, signup, tenants, errors_viewed, boarding, notifications, webhook, users, \ + custom_metrics, saved_search, integrations_global, sessions_viewed, errors_favorite from chalicelib.core.collaboration_slack import Slack from chalicelib.utils import email_helper, helper, captcha from chalicelib.utils.TimeUTC import TimeUTC @@ -51,6 +51,14 @@ def login(data: schemas.UserLoginSchema = Body(...)): } +@app.post('/{projectId}/sessions/search', tags=["sessions"]) +@app.post('/{projectId}/sessions/search2', tags=["sessions"]) +def sessions_search(projectId: int, data: schemas.FlatSessionsSearchPayloadSchema = Body(...), + context: schemas.CurrentContext = Depends(OR_context)): + data = sessions.search_sessions(data=data, project_id=projectId, user_id=context.user_id) + return {'data': data} + + @app.get('/{projectId}/sessions/{sessionId}', tags=["sessions"]) @app.get('/{projectId}/sessions2/{sessionId}', tags=["sessions"]) def get_session2(projectId: int, sessionId: Union[int, str], background_tasks: BackgroundTasks, @@ -62,7 +70,7 @@ def get_session2(projectId: int, sessionId: Union[int, str], background_tasks: B if data is None: return {"errors": ["session not found"]} if data.get("inDB"): - background_tasks.add_task(sessions_favorite_viewed.view_session, project_id=projectId, user_id=context.user_id, + background_tasks.add_task(sessions_viewed.view_session, project_id=projectId, user_id=context.user_id, session_id=sessionId) return { 'data': data @@ -74,8 +82,8 @@ def get_session2(projectId: int, sessionId: Union[int, str], background_tasks: B def add_remove_favorite_session2(projectId: int, sessionId: int, context: schemas.CurrentContext = Depends(OR_context)): return { - "data": sessions_favorite_viewed.favorite_session(project_id=projectId, user_id=context.user_id, - session_id=sessionId)} + "data": sessions_favorite.favorite_session(project_id=projectId, user_id=context.user_id, + session_id=sessionId)} @app.get('/{projectId}/sessions/{sessionId}/assign', tags=["sessions"]) @@ -164,23 +172,6 @@ def events_search(projectId: int, q: str, return result -@app.post('/{projectId}/sessions/search2', tags=["sessions"]) -def sessions_search2(projectId: int, data: schemas.FlatSessionsSearchPayloadSchema = Body(...), - context: schemas.CurrentContext = Depends(OR_context)): - data = sessions.search2_pg(data=data, project_id=projectId, user_id=context.user_id) - return {'data': data} - - -@app.get('/{projectId}/sessions/filters', tags=["sessions"]) -def session_filter_values(projectId: int, context: schemas.CurrentContext = Depends(OR_context)): - return {'data': sessions_metas.get_key_values(projectId)} - - -@app.get('/{projectId}/sessions/filters/top', tags=["sessions"]) -def session_top_filter_values(projectId: int, context: schemas.CurrentContext = Depends(OR_context)): - return {'data': sessions_metas.get_top_key_values(projectId)} - - @app.get('/{projectId}/integrations', tags=["integrations"]) def get_integrations_status(projectId: int, context: schemas.CurrentContext = Depends(OR_context)): data = integrations_global.get_global_integrations_status(tenant_id=context.tenant_id, @@ -909,7 +900,7 @@ def get_live_session(projectId: int, sessionId: str, background_tasks: Backgroun if data is None: return {"errors": ["session not found"]} if data.get("inDB"): - background_tasks.add_task(sessions_favorite_viewed.view_session, project_id=projectId, + background_tasks.add_task(sessions_viewed.view_session, project_id=projectId, user_id=context.user_id, session_id=sessionId) return {'data': data} @@ -995,7 +986,7 @@ def errors_get_details(projectId: int, errorId: str, background_tasks: Backgroun data = errors.get_details(project_id=projectId, user_id=context.user_id, error_id=errorId, **{"density24": density24, "density30": density30}) if data.get("data") is not None: - background_tasks.add_task(errors_favorite_viewed.viewed_error, project_id=projectId, user_id=context.user_id, + background_tasks.add_task(errors_viewed.viewed_error, project_id=projectId, user_id=context.user_id, error_id=errorId) return data @@ -1024,7 +1015,7 @@ def errors_get_details_sourcemaps(projectId: int, errorId: str, def add_remove_favorite_error(projectId: int, errorId: str, action: str, startDate: int = TimeUTC.now(-7), endDate: int = TimeUTC.now(), context: schemas.CurrentContext = Depends(OR_context)): if action == "favorite": - return errors_favorite_viewed.favorite_error(project_id=projectId, user_id=context.user_id, error_id=errorId) + return errors_favorite.favorite_error(project_id=projectId, user_id=context.user_id, error_id=errorId) elif action == "sessions": start_date = startDate end_date = endDate diff --git a/api/routers/core_dynamic.py b/api/routers/core_dynamic.py index 73dad85bb..d37a56728 100644 --- a/api/routers/core_dynamic.py +++ b/api/routers/core_dynamic.py @@ -7,7 +7,7 @@ from starlette.responses import RedirectResponse import schemas from chalicelib.core import integrations_manager from chalicelib.core import sessions -from chalicelib.core import tenants, users, metadata, projects, license +from chalicelib.core import tenants, users, projects, license from chalicelib.core import webhook from chalicelib.core.collaboration_slack import Slack from chalicelib.utils import helper diff --git a/api/schemas.py b/api/schemas.py index b87f5e4cd..591d8e905 100644 --- a/api/schemas.py +++ b/api/schemas.py @@ -554,13 +554,15 @@ class _SessionSearchEventRaw(__MixedSearchFilter): assert values.get("sourceOperator") is not None, \ "sourceOperator should not be null for PerformanceEventType" if values["type"] == PerformanceEventType.time_between_events: + assert values["sourceOperator"] != MathOperator._equal.value, \ + f"{MathOperator._equal} is not allowed for duration of {PerformanceEventType.time_between_events}" assert len(values.get("value", [])) == 2, \ f"must provide 2 Events as value for {PerformanceEventType.time_between_events}" assert isinstance(values["value"][0], _SessionSearchEventRaw) \ and isinstance(values["value"][1], _SessionSearchEventRaw), \ f"event should be of type _SessionSearchEventRaw for {PerformanceEventType.time_between_events}" assert len(values["source"]) > 0 and isinstance(values["source"][0], int), \ - f"source of type int if required for {PerformanceEventType.time_between_events}" + f"source of type int is required for {PerformanceEventType.time_between_events}" else: assert "source" in values, f"source is required for {values.get('type')}" assert isinstance(values["source"], list), f"source of type list is required for {values.get('type')}" @@ -736,7 +738,7 @@ class ErrorSort(str, Enum): sessions_count = 'sessions' -class SearchErrorsSchema(SessionsSearchPayloadSchema): +class SearchErrorsSchema(FlatSessionsSearchPayloadSchema): sort: ErrorSort = Field(default=ErrorSort.occurrence) density: Optional[int] = Field(7) status: Optional[ErrorStatus] = Field(default=ErrorStatus.all) @@ -768,7 +770,7 @@ class MobileSignPayloadSchema(BaseModel): keys: List[str] = Field(...) -class CustomMetricSeriesFilterSchema(FlatSessionsSearchPayloadSchema, SearchErrorsSchema): +class CustomMetricSeriesFilterSchema(SearchErrorsSchema): startDate: Optional[int] = Field(None) endDate: Optional[int] = Field(None) sort: Optional[str] = Field(None) diff --git a/ee/api/.gitignore b/ee/api/.gitignore index 12a468ef1..59d7202a9 100644 --- a/ee/api/.gitignore +++ b/ee/api/.gitignore @@ -177,11 +177,15 @@ chalicelib/saas README/* Pipfile +.local/* + /chalicelib/core/alerts.py /chalicelib/core/alerts_processor.py /chalicelib/core/announcements.py +/chalicelib/core/autocomplete.py /chalicelib/core/collaboration_slack.py -/chalicelib/core/errors_favorite_viewed.py +/chalicelib/core/errors.py +/chalicelib/core/errors_favorite.py /chalicelib/core/events.py /chalicelib/core/events_ios.py /chalicelib/core/funnels.py @@ -257,4 +261,4 @@ Pipfile /build_alerts.sh /routers/subs/metrics.py /routers/subs/v1_api.py -/chalicelib/core/dashboards.py \ No newline at end of file +/chalicelib/core/dashboards.py diff --git a/ee/api/auth/auth_project.py b/ee/api/auth/auth_project.py index c1e1d38cd..2c78041e0 100644 --- a/ee/api/auth/auth_project.py +++ b/ee/api/auth/auth_project.py @@ -15,13 +15,15 @@ class ProjectAuthorizer: if len(request.path_params.keys()) == 0 or request.path_params.get(self.project_identifier) is None: return current_user: schemas.CurrentContext = await OR_context(request) - project_identifier = request.path_params[self.project_identifier] + value = request.path_params[self.project_identifier] user_id = current_user.user_id if request.state.authorizer_identity == "jwt" else None if (self.project_identifier == "projectId" \ - and not projects.is_authorized(project_id=project_identifier, tenant_id=current_user.tenant_id, + and not projects.is_authorized(project_id=value, tenant_id=current_user.tenant_id, user_id=user_id)) \ - or (self.project_identifier.lower() == "projectKey" \ - and not projects.is_authorized(project_id=projects.get_internal_project_id(project_identifier), - tenant_id=current_user.tenant_id, user_id=user_id)): + or (self.project_identifier == "projectKey" \ + and not projects.is_authorized( + project_id=projects.get_internal_project_id(value), + tenant_id=current_user.tenant_id, user_id=user_id)): print("unauthorized project") + print(value) raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="unauthorized project.") diff --git a/ee/api/chalicelib/core/__init__.py b/ee/api/chalicelib/core/__init__.py index e69de29bb..ffccbb1a4 100644 --- a/ee/api/chalicelib/core/__init__.py +++ b/ee/api/chalicelib/core/__init__.py @@ -0,0 +1,28 @@ +from decouple import config +import logging + +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + +if config("EXP_SESSIONS_SEARCH", cast=bool, default=False): + print(">>> Using experimental sessions search") + from . import sessions_exp as sessions +else: + from . import sessions as sessions + +if config("EXP_AUTOCOMPLETE", cast=bool, default=False): + print(">>> Using experimental autocomplete") + from . import autocomplete_exp as autocomplete +else: + from . import autocomplete as autocomplete + +if config("EXP_ERRORS_SEARCH", cast=bool, default=False): + print(">>> Using experimental error search") + from . import errors_exp as errors +else: + from . import errors as errors + +if config("EXP_METRICS", cast=bool, default=False): + print(">>> Using experimental metrics") + from . import metrics_exp as metrics +else: + from . import metrics as metrics diff --git a/ee/api/chalicelib/core/autocomplete_exp.py b/ee/api/chalicelib/core/autocomplete_exp.py new file mode 100644 index 000000000..db2ecb95b --- /dev/null +++ b/ee/api/chalicelib/core/autocomplete_exp.py @@ -0,0 +1,107 @@ +import schemas +from chalicelib.utils import ch_client +from chalicelib.utils import helper +from chalicelib.utils.event_filter_definition import Event + +TABLE = "final.autocomplete" + + +def __get_autocomplete_table(value, project_id): + autocomplete_events = [schemas.FilterType.rev_id, + schemas.EventType.click, + schemas.FilterType.user_device, + schemas.FilterType.user_id, + schemas.FilterType.user_browser, + schemas.FilterType.user_os, + schemas.EventType.custom, + schemas.FilterType.user_country, + schemas.EventType.location, + schemas.EventType.input] + autocomplete_events.sort() + sub_queries = [] + for e in autocomplete_events: + sub_queries.append(f"""(SELECT type, value + FROM {TABLE} + WHERE project_id = %(project_id)s + AND type= '{e}' + AND value ILIKE %(svalue)s + ORDER BY value + LIMIT 5)""") + if len(value) > 2: + sub_queries.append(f"""(SELECT type, value + FROM {TABLE} + WHERE project_id = %(project_id)s + AND type= '{e}' + AND value ILIKE %(value)s + ORDER BY value + LIMIT 5)""") + with ch_client.ClickHouseClient() as cur: + query = " UNION DISTINCT ".join(sub_queries) + ";" + params = {"project_id": project_id, "value": helper.string_to_sql_like(value), + "svalue": helper.string_to_sql_like("^" + value)} + results = [] + try: + results = cur.execute(query=query, params=params) + except Exception as err: + print("--------- CH AUTOCOMPLETE SEARCH QUERY EXCEPTION -----------") + print(cur.format(query=query, params=params)) + print("--------- PARAMS -----------") + print(params) + print("--------- VALUE -----------") + print(value) + print("--------------------") + raise err + return results + + +def __generic_query(typename, value_length=None): + if value_length is None or value_length > 2: + return f"""(SELECT DISTINCT value, type + FROM {TABLE} + WHERE + project_id = %(project_id)s + AND type='{typename}' + AND value ILIKE %(svalue)s + ORDER BY value + LIMIT 5) + UNION DISTINCT + (SELECT DISTINCT value, type + FROM {TABLE} + WHERE + project_id = %(project_id)s + AND type='{typename}' + AND value ILIKE %(value)s + ORDER BY value + LIMIT 5);""" + return f"""SELECT DISTINCT value, type + FROM {TABLE} + WHERE + project_id = %(project_id)s + AND type='{typename}' + AND value ILIKE %(svalue)s + ORDER BY value + LIMIT 10;""" + + +def __generic_autocomplete(event: Event): + def f(project_id, value, key=None, source=None): + with ch_client.ClickHouseClient() as cur: + query = __generic_query(event.ui_type, value_length=len(value)) + params = {"project_id": project_id, "value": helper.string_to_sql_like(value), + "svalue": helper.string_to_sql_like("^" + value)} + results = cur.execute(query=query, params=params) + return helper.list_to_camel_case(results) + + return f + + +def __generic_autocomplete_metas(typename): + def f(project_id, text): + with ch_client.ClickHouseClient() as cur: + query = __generic_query(typename, value_length=len(text)) + params = {"project_id": project_id, "value": helper.string_to_sql_like(text), + "svalue": helper.string_to_sql_like("^" + text)} + results = cur.execute(query=query, params=params) + return results + + return f diff --git a/ee/api/chalicelib/core/errors.py b/ee/api/chalicelib/core/errors_exp.py similarity index 68% rename from ee/api/chalicelib/core/errors.py rename to ee/api/chalicelib/core/errors_exp.py index 07a5e10ba..d9fa8f920 100644 --- a/ee/api/chalicelib/core/errors.py +++ b/ee/api/chalicelib/core/errors_exp.py @@ -1,13 +1,59 @@ import json import schemas -from chalicelib.core import metrics +from chalicelib.core import metrics, metadata from chalicelib.core import sourcemaps, sessions from chalicelib.utils import ch_client, metrics_helper from chalicelib.utils import pg_client, helper from chalicelib.utils.TimeUTC import TimeUTC +def _multiple_values(values, value_key="value"): + query_values = {} + if values is not None and isinstance(values, list): + for i in range(len(values)): + k = f"{value_key}_{i}" + query_values[k] = values[i] + return query_values + + +def __get_sql_operator(op: schemas.SearchEventOperator): + return { + schemas.SearchEventOperator._is: "=", + schemas.SearchEventOperator._is_any: "IN", + schemas.SearchEventOperator._on: "=", + schemas.SearchEventOperator._on_any: "IN", + schemas.SearchEventOperator._is_not: "!=", + schemas.SearchEventOperator._not_on: "!=", + schemas.SearchEventOperator._contains: "ILIKE", + schemas.SearchEventOperator._not_contains: "NOT ILIKE", + schemas.SearchEventOperator._starts_with: "ILIKE", + schemas.SearchEventOperator._ends_with: "ILIKE", + }.get(op, "=") + + +def _isAny_opreator(op: schemas.SearchEventOperator): + return op in [schemas.SearchEventOperator._on_any, schemas.SearchEventOperator._is_any] + + +def _isUndefined_operator(op: schemas.SearchEventOperator): + return op in [schemas.SearchEventOperator._is_undefined] + + +def __is_negation_operator(op: schemas.SearchEventOperator): + return op in [schemas.SearchEventOperator._is_not, + schemas.SearchEventOperator._not_on, + schemas.SearchEventOperator._not_contains] + + +def _multiple_conditions(condition, values, value_key="value", is_not=False): + query = [] + for i in range(len(values)): + k = f"{value_key}_{i}" + query.append(condition.replace(value_key, k)) + return "(" + (" AND " if is_not else " OR ").join(query) + ")" + + def get(error_id, family=False): if family: return get_batch([error_id]) @@ -263,10 +309,7 @@ def get_details(project_id, error_id, user_id, **data): parent_error_id,session_id, user_anonymous_id, user_id, user_uuid, user_browser, user_browser_version, user_os, user_os_version, user_device, payload, - COALESCE((SELECT TRUE - FROM public.user_favorite_errors AS fe - WHERE pe.error_id = fe.error_id - AND fe.user_id = %(userId)s), FALSE) AS favorite, + FALSE AS favorite, True AS viewed FROM public.errors AS pe INNER JOIN events.errors AS ee USING (error_id) @@ -420,8 +463,10 @@ def get_details_chart(project_id, error_id, user_id, **data): def __get_basic_constraints(platform=None, time_constraint=True, startTime_arg_name="startDate", - endTime_arg_name="endDate"): + endTime_arg_name="endDate", type_condition=True): ch_sub_query = ["project_id =toUInt32(%(project_id)s)"] + if type_condition: + ch_sub_query.append("event_type='ERROR'") if time_constraint: ch_sub_query += [f"datetime >= toDateTime(%({startTime_arg_name})s/1000)", f"datetime < toDateTime(%({endTime_arg_name})s/1000)"] @@ -465,214 +510,217 @@ def __get_basic_constraints_pg(platform=None, time_constraint=True, startTime_ar return ch_sub_query -def search(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): - empty_response = {'total': 0, - 'errors': [] - } +def search(data: schemas.SearchErrorsSchema, project_id, user_id): + MAIN_EVENTS_TABLE = "final.events" + MAIN_SESSIONS_TABLE = "final.sessions" + if data.startDate >= TimeUTC.now(delta_days=-7): + MAIN_EVENTS_TABLE = "final.events_l7d_mv" + MAIN_SESSIONS_TABLE = "final.sessions_l7d_mv" platform = None for f in data.filters: if f.type == schemas.FilterType.platform and len(f.value) > 0: platform = f.value[0] - pg_sub_query = __get_basic_constraints_pg(platform, project_key="sessions.project_id") - pg_sub_query += ["sessions.start_ts>=%(startDate)s", "sessions.start_ts<%(endDate)s", "source ='js_exception'", - "pe.project_id=%(project_id)s"] - # To ignore Script error - pg_sub_query.append("pe.message!='Script error.'") - pg_sub_query_chart = __get_basic_constraints_pg(platform, time_constraint=False, chart=True, project_key=None) - # pg_sub_query_chart.append("source ='js_exception'") - pg_sub_query_chart.append("errors.error_id =details.error_id") - statuses = [] - error_ids = None - if data.startDate is None: - data.startDate = TimeUTC.now(-30) - if data.endDate is None: - data.endDate = TimeUTC.now(1) - if len(data.events) > 0 or len(data.filters) > 0: - print("-- searching for sessions before errors") - # if favorite_only=True search for sessions associated with favorite_error - statuses = sessions.search2_pg(data=data, project_id=project_id, user_id=user_id, errors_only=True, - error_status=data.status) - if len(statuses) == 0: - return empty_response - error_ids = [e["errorId"] for e in statuses] - with pg_client.PostgresClient() as cur: - if data.startDate is None: - data.startDate = TimeUTC.now(-7) - if data.endDate is None: - data.endDate = TimeUTC.now() - step_size = metrics_helper.__get_step_size(data.startDate, data.endDate, data.density, factor=1) - sort = __get_sort_key('datetime') - if data.sort is not None: - sort = __get_sort_key(data.sort) - order = "DESC" - if data.order is not None: - order = data.order - extra_join = "" - - params = { - "startDate": data.startDate, - "endDate": data.endDate, - "project_id": project_id, - "userId": user_id, - "step_size": step_size} - if data.status != schemas.ErrorStatus.all: - pg_sub_query.append("status = %(error_status)s") - params["error_status"] = data.status - if data.limit is not None and data.page is not None: - params["errors_offset"] = (data.page - 1) * data.limit - params["errors_limit"] = data.limit - else: - params["errors_offset"] = 0 - params["errors_limit"] = 200 - - if error_ids is not None: - params["error_ids"] = tuple(error_ids) - pg_sub_query.append("error_id IN %(error_ids)s") - if data.bookmarked: - pg_sub_query.append("ufe.user_id = %(userId)s") - extra_join += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" - if data.query is not None and len(data.query) > 0: - pg_sub_query.append("(pe.name ILIKE %(error_query)s OR pe.message ILIKE %(error_query)s)") - params["error_query"] = helper.values_for_operator(value=data.query, - op=schemas.SearchEventOperator._contains) - - main_pg_query = f"""SELECT full_count, - error_id, - name, - message, - users, - sessions, - last_occurrence, - first_occurrence, - chart - FROM (SELECT COUNT(details) OVER () AS full_count, details.* - FROM (SELECT error_id, - name, - message, - COUNT(DISTINCT user_uuid) AS users, - COUNT(DISTINCT session_id) AS sessions, - MAX(timestamp) AS max_datetime, - MIN(timestamp) AS min_datetime - FROM events.errors - INNER JOIN public.errors AS pe USING (error_id) - INNER JOIN public.sessions USING (session_id) - {extra_join} - WHERE {" AND ".join(pg_sub_query)} - GROUP BY error_id, name, message - ORDER BY {sort} {order}) AS details - LIMIT %(errors_limit)s OFFSET %(errors_offset)s - ) AS details - INNER JOIN LATERAL (SELECT MAX(timestamp) AS last_occurrence, - MIN(timestamp) AS first_occurrence - FROM events.errors - WHERE errors.error_id = details.error_id) AS time_details ON (TRUE) - INNER JOIN LATERAL (SELECT jsonb_agg(chart_details) AS chart - FROM (SELECT generated_timestamp AS timestamp, - COUNT(session_id) AS count - FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS generated_timestamp - LEFT JOIN LATERAL (SELECT DISTINCT session_id - FROM events.errors - WHERE {" AND ".join(pg_sub_query_chart)} - ) AS sessions ON (TRUE) - GROUP BY timestamp - ORDER BY timestamp) AS chart_details) AS chart_details ON (TRUE);""" - - # print("--------------------") - # print(cur.mogrify(main_pg_query, params)) - # print("--------------------") - - cur.execute(cur.mogrify(main_pg_query, params)) - rows = cur.fetchall() - total = 0 if len(rows) == 0 else rows[0]["full_count"] - if flows: - return {"count": total} - - if total == 0: - rows = [] - else: - if len(statuses) == 0: - query = cur.mogrify( - """SELECT error_id, status, parent_error_id, payload, - COALESCE((SELECT TRUE - FROM public.user_favorite_errors AS fe - WHERE errors.error_id = fe.error_id - AND fe.user_id = %(user_id)s LIMIT 1), FALSE) AS favorite, - COALESCE((SELECT TRUE - FROM public.user_viewed_errors AS ve - WHERE errors.error_id = ve.error_id - AND ve.user_id = %(user_id)s LIMIT 1), FALSE) AS viewed - FROM public.errors - WHERE project_id = %(project_id)s AND error_id IN %(error_ids)s;""", - {"project_id": project_id, "error_ids": tuple([r["error_id"] for r in rows]), - "user_id": user_id}) - cur.execute(query=query) - statuses = helper.list_to_camel_case(cur.fetchall()) - statuses = { - s["errorId"]: s for s in statuses - } - - for r in rows: - r.pop("full_count") - if r["error_id"] in statuses: - r["status"] = statuses[r["error_id"]]["status"] - r["parent_error_id"] = statuses[r["error_id"]]["parentErrorId"] - r["favorite"] = statuses[r["error_id"]]["favorite"] - r["viewed"] = statuses[r["error_id"]]["viewed"] - r["stack"] = format_first_stack_frame(statuses[r["error_id"]])["stack"] - else: - r["status"] = "untracked" - r["parent_error_id"] = None - r["favorite"] = False - r["viewed"] = False - r["stack"] = None - - offset = len(rows) - rows = [r for r in rows if r["stack"] is None - or (len(r["stack"]) == 0 or len(r["stack"]) > 1 - or len(r["stack"]) > 0 - and (r["message"].lower() != "script error." or len(r["stack"][0]["absPath"]) > 0))] - offset -= len(rows) - return { - 'total': total - offset, - 'errors': helper.list_to_camel_case(rows) - } - - -# refactor this function after clickhouse structure changes (missing search by query) -def search_deprecated(data: schemas.SearchErrorsSchema, project_id, user_id, flows=False): - empty_response = {"data": { - 'total': 0, - 'errors': [] - }} - platform = None - for f in data.filters: - if f.type == schemas.FilterType.platform and len(f.value) > 0: - platform = f.value[0] - ch_sub_query = __get_basic_constraints(platform) + ch_sessions_sub_query = __get_basic_constraints(platform, type_condition=False) + ch_sub_query = __get_basic_constraints(platform, type_condition=True) ch_sub_query.append("source ='js_exception'") # To ignore Script error ch_sub_query.append("message!='Script error.'") - statuses = [] error_ids = None - # Clickhouse keeps data for the past month only, so no need to search beyond that - if data.startDate is None or data.startDate < TimeUTC.now(delta_days=-31): - data.startDate = TimeUTC.now(-30) + + if data.startDate is None: + data.startDate = TimeUTC.now(-7) if data.endDate is None: data.endDate = TimeUTC.now(1) - if len(data.events) > 0 or len(data.filters) > 0 or data.status != schemas.ErrorStatus.all: - print("-- searching for sessions before errors") - # if favorite_only=True search for sessions associated with favorite_error - statuses = sessions.search2_pg(data=data, project_id=project_id, user_id=user_id, errors_only=True, - error_status=data.status) - if len(statuses) == 0: - return empty_response - error_ids = [e["errorId"] for e in statuses] - with ch_client.ClickHouseClient() as ch, pg_client.PostgresClient() as cur: - if data.startDate is None: - data.startDate = TimeUTC.now(-7) - if data.endDate is None: - data.endDate = TimeUTC.now() + + subquery_part = "" + params = {} + if len(data.events) > 0: + errors_condition_count = 0 + for i, e in enumerate(data.events): + if e.type == schemas.EventType.error: + errors_condition_count += 1 + is_any = _isAny_opreator(e.operator) + op = __get_sql_operator(e.operator) + e_k = f"e_value{i}" + params = {**params, **_multiple_values(e.value, value_key=e_k)} + if not is_any and e.value not in [None, "*", ""]: + ch_sub_query.append( + _multiple_conditions(f"(message {op} %({e_k})s OR name {op} %({e_k})s)", + e.value, value_key=e_k)) + if len(data.events) > errors_condition_count: + print("----------Sessions conditions") + subquery_part_args, subquery_part = sessions.search_query_parts_ch(data=data, error_status=data.status, + errors_only=True, + project_id=project_id, user_id=user_id, + issue=None, + favorite_only=False) + subquery_part = f"INNER JOIN {subquery_part} USING(session_id)" + params = {**params, **subquery_part_args} + if len(data.filters) > 0: + meta_keys = None + # to reduce include a sub-query of sessions inside events query, in order to reduce the selected data + for i, f in enumerate(data.filters): + if not isinstance(f.value, list): + f.value = [f.value] + filter_type = f.type + f.value = helper.values_for_operator(value=f.value, op=f.operator) + f_k = f"f_value{i}" + params = {**params, f_k: f.value, **_multiple_values(f.value, value_key=f_k)} + op = __get_sql_operator(f.operator) \ + if filter_type not in [schemas.FilterType.events_count] else f.operator + is_any = _isAny_opreator(f.operator) + is_undefined = _isUndefined_operator(f.operator) + if not is_any and not is_undefined and len(f.value) == 0: + continue + is_not = False + if __is_negation_operator(f.operator): + is_not = True + if filter_type == schemas.FilterType.user_browser: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.user_browser)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_os, schemas.FilterType.user_os_ios]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.user_os)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_device, schemas.FilterType.user_device_ios]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.user_device)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_country, schemas.FilterType.user_country_ios]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.user_country)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, + value_key=f_k)) + + + elif filter_type in [schemas.FilterType.utm_source]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.utm_source)') + elif is_undefined: + ch_sessions_sub_query.append('isNull(s.utm_source)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.utm_source {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type in [schemas.FilterType.utm_medium]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.utm_medium)') + elif is_undefined: + ch_sessions_sub_query.append('isNull(s.utm_medium)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.utm_medium {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_campaign]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.utm_campaign)') + elif is_undefined: + ch_sessions_sub_query.append('isNull(s.utm_campaign)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f's.utm_campaign {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type == schemas.FilterType.duration: + if len(f.value) > 0 and f.value[0] is not None: + ch_sessions_sub_query.append("s.duration >= %(minDuration)s") + params["minDuration"] = f.value[0] + if len(f.value) > 1 and f.value[1] is not None and int(f.value[1]) > 0: + ch_sessions_sub_query.append("s.duration <= %(maxDuration)s") + params["maxDuration"] = f.value[1] + + elif filter_type == schemas.FilterType.referrer: + # extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)" + if is_any: + referrer_constraint = 'isNotNull(s.base_referrer)' + else: + referrer_constraint = _multiple_conditions(f"s.base_referrer {op} %({f_k})s", f.value, + is_not=is_not, value_key=f_k) + elif filter_type == schemas.FilterType.metadata: + # get metadata list only if you need it + if meta_keys is None: + meta_keys = metadata.get(project_id=project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + if f.source in meta_keys.keys(): + if is_any: + ch_sessions_sub_query.append(f"isNotNull(s.{metadata.index_to_colname(meta_keys[f.source])})") + elif is_undefined: + ch_sessions_sub_query.append(f"isNull(s.{metadata.index_to_colname(meta_keys[f.source])})") + else: + ch_sessions_sub_query.append( + _multiple_conditions( + f"s.{metadata.index_to_colname(meta_keys[f.source])} {op} toString(%({f_k})s)", + f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.user_id)') + elif is_undefined: + ch_sessions_sub_query.append('isNull(s.user_id)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f"s.user_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.user_anonymous_id, + schemas.FilterType.user_anonymous_id_ios]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.user_anonymous_id)') + elif is_undefined: + ch_sessions_sub_query.append('isNull(s.user_anonymous_id)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f"s.user_anonymous_id {op} toString(%({f_k})s)", f.value, + is_not=is_not, + value_key=f_k)) + + elif filter_type in [schemas.FilterType.rev_id, schemas.FilterType.rev_id_ios]: + if is_any: + ch_sessions_sub_query.append('isNotNull(s.rev_id)') + elif is_undefined: + ch_sessions_sub_query.append('isNull(s.rev_id)') + else: + ch_sessions_sub_query.append( + _multiple_conditions(f"s.rev_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type == schemas.FilterType.platform: + # op = __get_sql_operator(f.operator) + ch_sessions_sub_query.append( + _multiple_conditions(f"s.user_device_type {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + # elif filter_type == schemas.FilterType.issue: + # if is_any: + # ch_sessions_sub_query.append("notEmpty(s.issue_types)") + # else: + # ch_sessions_sub_query.append(f"hasAny(s.issue_types,%({f_k})s)") + # # _multiple_conditions(f"%({f_k})s {op} ANY (s.issue_types)", f.value, is_not=is_not, + # # value_key=f_k)) + # + # if is_not: + # extra_constraints[-1] = f"not({extra_constraints[-1]})" + # ss_constraints[-1] = f"not({ss_constraints[-1]})" + elif filter_type == schemas.FilterType.events_count: + ch_sessions_sub_query.append( + _multiple_conditions(f"s.events_count {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + + with ch_client.ClickHouseClient() as ch: step_size = __get_step_size(data.startDate, data.endDate, data.density) sort = __get_sort_key('datetime') if data.sort is not None: @@ -681,6 +729,7 @@ def search_deprecated(data: schemas.SearchErrorsSchema, project_id, user_id, flo if data.order is not None: order = data.order params = { + **params, "startDate": data.startDate, "endDate": data.endDate, "project_id": project_id, @@ -692,118 +741,82 @@ def search_deprecated(data: schemas.SearchErrorsSchema, project_id, user_id, flo else: params["errors_offset"] = 0 params["errors_limit"] = 200 - if data.bookmarked: - cur.execute(cur.mogrify(f"""SELECT error_id - FROM public.user_favorite_errors - WHERE user_id = %(userId)s - {"" if error_ids is None else "AND error_id IN %(error_ids)s"}""", - {"userId": user_id, "error_ids": tuple(error_ids or [])})) - error_ids = cur.fetchall() - if len(error_ids) == 0: - return empty_response - error_ids = [e["error_id"] for e in error_ids] + # if data.bookmarked: + # cur.execute(cur.mogrify(f"""SELECT error_id + # FROM public.user_favorite_errors + # WHERE user_id = %(userId)s + # {"" if error_ids is None else "AND error_id IN %(error_ids)s"}""", + # {"userId": user_id, "error_ids": tuple(error_ids or [])})) + # error_ids = cur.fetchall() + # if len(error_ids) == 0: + # return empty_response + # error_ids = [e["error_id"] for e in error_ids] if error_ids is not None: params["error_ids"] = tuple(error_ids) ch_sub_query.append("error_id IN %(error_ids)s") main_ch_query = f"""\ - SELECT COUNT(DISTINCT error_id) AS count - FROM errors - WHERE {" AND ".join(ch_sub_query)};""" - # print("------------") - # print(ch.client().substitute_params(main_ch_query, params)) - # print("------------") - total = ch.execute(query=main_ch_query, params=params)[0]["count"] - if flows: - return {"data": {"count": total}} - if total == 0: - rows = [] - else: - main_ch_query = f"""\ - SELECT details.error_id AS error_id, name, message, users, sessions, last_occurrence, first_occurrence, chart - FROM (SELECT error_id, - name, - message, - COUNT(DISTINCT user_uuid) AS users, - COUNT(DISTINCT session_id) AS sessions, - MAX(datetime) AS max_datetime, - MIN(datetime) AS min_datetime - FROM errors - WHERE {" AND ".join(ch_sub_query)} - GROUP BY error_id, name, message - ORDER BY {sort} {order} - LIMIT %(errors_limit)s OFFSET %(errors_offset)s) AS details - INNER JOIN (SELECT error_id AS error_id, toUnixTimestamp(MAX(datetime))*1000 AS last_occurrence, toUnixTimestamp(MIN(datetime))*1000 AS first_occurrence - FROM errors - GROUP BY error_id) AS time_details - ON details.error_id=time_details.error_id - INNER JOIN (SELECT error_id, groupArray([timestamp, count]) AS chart - FROM (SELECT error_id, toUnixTimestamp(toStartOfInterval(datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - COUNT(DISTINCT session_id) AS count - FROM errors - WHERE {" AND ".join(ch_sub_query)} - GROUP BY error_id, timestamp - ORDER BY timestamp) AS sub_table - GROUP BY error_id) AS chart_details ON details.error_id=chart_details.error_id;""" + SELECT details.error_id AS error_id, + name, message, users, total, viewed, + sessions, last_occurrence, first_occurrence, chart + FROM (SELECT error_id, + name, + message, + COUNT(DISTINCT user_id) AS users, + COUNT(DISTINCT events.session_id) AS sessions, + MAX(datetime) AS max_datetime, + MIN(datetime) AS min_datetime, + COUNT(DISTINCT events.error_id) OVER() AS total, + any(isNotNull(viewed_error_id)) AS viewed + FROM {MAIN_EVENTS_TABLE} AS events + LEFT JOIN (SELECT error_id AS viewed_error_id + FROM final.user_viewed_errors + WHERE project_id=%(project_id)s + AND user_id=%(userId)s) AS viewed_errors ON(events.error_id=viewed_errors.viewed_error_id) + INNER JOIN (SELECT session_id, coalesce(user_id,toString(user_uuid)) AS user_id + FROM {MAIN_SESSIONS_TABLE} AS s + {subquery_part} + WHERE {" AND ".join(ch_sessions_sub_query)}) AS sessions + ON (events.session_id = sessions.session_id) + WHERE {" AND ".join(ch_sub_query)} + GROUP BY error_id, name, message + ORDER BY {sort} {order} + LIMIT %(errors_limit)s OFFSET %(errors_offset)s) AS details + INNER JOIN (SELECT error_id AS error_id, + toUnixTimestamp(MAX(datetime))*1000 AS last_occurrence, + toUnixTimestamp(MIN(datetime))*1000 AS first_occurrence + FROM {MAIN_EVENTS_TABLE} + WHERE project_id=%(project_id)s + AND event_type='ERROR' + GROUP BY error_id) AS time_details + ON details.error_id=time_details.error_id + INNER JOIN (SELECT error_id, groupArray([timestamp, count]) AS chart + FROM (SELECT error_id, toUnixTimestamp(toStartOfInterval(datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT session_id) AS count + FROM {MAIN_EVENTS_TABLE} + WHERE {" AND ".join(ch_sub_query)} + GROUP BY error_id, timestamp + ORDER BY timestamp) AS sub_table + GROUP BY error_id) AS chart_details ON details.error_id=chart_details.error_id;""" - # print("------------") - # print(ch.client().substitute_params(main_ch_query, params)) - # print("------------") + print("------------") + print(ch.format(main_ch_query, params)) + print("------------") - rows = ch.execute(query=main_ch_query, params=params) - if len(statuses) == 0: - query = cur.mogrify( - """SELECT error_id, status, parent_error_id, payload, - COALESCE((SELECT TRUE - FROM public.user_favorite_errors AS fe - WHERE errors.error_id = fe.error_id - AND fe.user_id = %(userId)s LIMIT 1), FALSE) AS favorite, - COALESCE((SELECT TRUE - FROM public.user_viewed_errors AS ve - WHERE errors.error_id = ve.error_id - AND ve.user_id = %(userId)s LIMIT 1), FALSE) AS viewed - FROM public.errors - WHERE project_id = %(project_id)s AND error_id IN %(error_ids)s;""", - {"project_id": project_id, "error_ids": tuple([r["error_id"] for r in rows]), - "userId": user_id}) - cur.execute(query=query) - statuses = helper.list_to_camel_case(cur.fetchall()) - statuses = { - s["errorId"]: s for s in statuses - } + rows = ch.execute(query=main_ch_query, params=params) + total = rows[0]["total"] if len(rows) > 0 else 0 for r in rows: - if r["error_id"] in statuses: - r["status"] = statuses[r["error_id"]]["status"] - r["parent_error_id"] = statuses[r["error_id"]]["parentErrorId"] - r["favorite"] = statuses[r["error_id"]]["favorite"] - r["viewed"] = statuses[r["error_id"]]["viewed"] - r["stack"] = format_first_stack_frame(statuses[r["error_id"]])["stack"] - else: - r["status"] = "untracked" - r["parent_error_id"] = None - r["favorite"] = False - r["viewed"] = False - r["stack"] = None - r["chart"] = list(r["chart"]) for i in range(len(r["chart"])): r["chart"][i] = {"timestamp": r["chart"][i][0], "count": r["chart"][i][1]} r["chart"] = metrics.__complete_missing_steps(rows=r["chart"], start_time=data.startDate, end_time=data.endDate, density=data.density, neutral={"count": 0}) - offset = len(rows) - rows = [r for r in rows if r["stack"] is None - or (len(r["stack"]) == 0 or len(r["stack"]) > 1 - or len(r["stack"]) > 0 - and (r["message"].lower() != "script error." or len(r["stack"][0]["absPath"]) > 0))] - offset -= len(rows) return { - "data": { - 'total': total - offset, - 'errors': helper.list_to_camel_case(rows) - } + 'total': total, + 'errors': helper.list_to_camel_case(rows) } diff --git a/ee/api/chalicelib/core/errors_viewed.py b/ee/api/chalicelib/core/errors_viewed.py new file mode 100644 index 000000000..f66e10d90 --- /dev/null +++ b/ee/api/chalicelib/core/errors_viewed.py @@ -0,0 +1,39 @@ +from chalicelib.utils import pg_client +from chalicelib.core import errors_viewed_exp + + +def add_viewed_error(project_id, user_id, error_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify("""INSERT INTO public.user_viewed_errors(user_id, error_id) + VALUES (%(userId)s,%(error_id)s);""", + {"userId": user_id, "error_id": error_id}) + ) + errors_viewed_exp.add_viewed_error(project_id=project_id, user_id=user_id, error_id=error_id) + + +def viewed_error_exists(user_id, error_id): + with pg_client.PostgresClient() as cur: + query = cur.mogrify( + """SELECT + errors.error_id AS hydrated, + COALESCE((SELECT TRUE + FROM public.user_viewed_errors AS ve + WHERE ve.error_id = %(error_id)s + AND ve.user_id = %(userId)s LIMIT 1), FALSE) AS viewed + FROM public.errors + WHERE error_id = %(error_id)s""", + {"userId": user_id, "error_id": error_id}) + cur.execute( + query=query + ) + r = cur.fetchone() + if r: + return r.get("viewed") + return True + + +def viewed_error(project_id, user_id, error_id): + if viewed_error_exists(user_id=user_id, error_id=error_id): + return None + return add_viewed_error(project_id=project_id, user_id=user_id, error_id=error_id) diff --git a/ee/api/chalicelib/core/errors_viewed_exp.py b/ee/api/chalicelib/core/errors_viewed_exp.py new file mode 100644 index 000000000..7a2a6ddc5 --- /dev/null +++ b/ee/api/chalicelib/core/errors_viewed_exp.py @@ -0,0 +1,15 @@ +import logging + +from decouple import config + +from chalicelib.utils import ch_client, exp_ch_helper + +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + + +def add_viewed_error(project_id, user_id, error_id): + with ch_client.ClickHouseClient() as cur: + query = f"""INSERT INTO {exp_ch_helper.get_user_viewed_errors_table()}(project_id,user_id, error_id) + VALUES (%(project_id)s,%(userId)s,%(error_id)s);""" + params = {"userId": user_id, "error_id": error_id, "project_id": project_id} + cur.execute(query=query, params=params) diff --git a/ee/api/chalicelib/core/metrics.py b/ee/api/chalicelib/core/metrics.py index 19977b0bf..62a1fbb27 100644 --- a/ee/api/chalicelib/core/metrics.py +++ b/ee/api/chalicelib/core/metrics.py @@ -167,9 +167,8 @@ def get_processed_sessions(project_id, startTimestamp=TimeUTC.now(delta_days=-1) ch_sub_query_chart += meta_condition with ch_client.ClickHouseClient() as ch: ch_query = f"""\ - SELECT - toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - COUNT(sessions.session_id) AS value + SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT sessions.session_id) AS value FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp @@ -191,7 +190,7 @@ def get_processed_sessions(project_id, startTimestamp=TimeUTC.now(delta_days=-1) endTimestamp = startTimestamp startTimestamp = endTimestamp - diff - ch_query = f""" SELECT COUNT(sessions.session_id) AS count + ch_query = f""" SELECT COUNT(1) AS count FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)};""" params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, @@ -278,7 +277,7 @@ def get_errors_trend(project_id, startTimestamp=TimeUTC.now(delta_days=-1), ch_query = f"""SELECT * FROM (SELECT errors.error_id AS error_id, errors.message AS error, - COUNT(errors.session_id) AS count, + COUNT(1) AS count, COUNT(DISTINCT errors.session_id) AS sessions FROM errors {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} @@ -293,7 +292,7 @@ def get_errors_trend(project_id, startTimestamp=TimeUTC.now(delta_days=-1), "endTimestamp": endTimestamp, **__get_constraint_values(args)} rows = ch.execute(query=ch_query, params=params) - print(f"got {len(rows)} rows") + # print(f"got {len(rows)} rows") if len(rows) == 0: return [] error_ids = [r["error_id"] for r in rows] @@ -302,7 +301,7 @@ def get_errors_trend(project_id, startTimestamp=TimeUTC.now(delta_days=-1), for error_id in error_ids: ch_query = f"""\ SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - COUNT(errors.session_id) AS count + COUNT(1) AS count FROM errors {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp @@ -461,11 +460,11 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1), with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT resources.url, COALESCE(avgOrNull(resources.duration),0) AS avg, - COUNT(resources.session_id) AS count + COUNT(1) AS count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} AND resources.duration>0 GROUP BY resources.url ORDER BY avg DESC LIMIT 10;""" - params = {"project_id": project_id, "startTimestamp": startTimestamp, + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} rows = ch.execute(query=ch_query, params=params) @@ -482,8 +481,7 @@ def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1), WHERE {" AND ".join(ch_sub_query_chart)} AND resources.duration>0 GROUP BY url, timestamp ORDER BY url, timestamp;""" - params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, "url": urls, **__get_constraint_values(args)} + params["url"] = urls u_rows = ch.execute(query=ch_query, params=params) for url in urls: sub_rows = [] @@ -783,27 +781,28 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day step_size = __get_step_size(startTimestamp, endTimestamp, density) ch_sub_query = __get_basic_constraints(table_name="resources", data=args) ch_sub_query.append("resources.success = 0") - ch_sub_query.append("resources.type != 'fetch'") + ch_sub_query.append("resources.type = 'img'") meta_condition = __get_meta_constraint(args) ch_sub_query += meta_condition with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT resources.url_hostpath AS key, - COUNT(resources.session_id) AS doc_count + COUNT(1) AS doc_count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY url_hostpath ORDER BY doc_count DESC LIMIT 10;""" - rows = ch.execute(query=ch_query, params={"project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) rows = [{"url": i["key"], "sessions": i["doc_count"]} for i in rows] if len(rows) == 0: return [] ch_sub_query.append("resources.url_hostpath = %(value)s") ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, - COUNT(resources.session_id) AS doc_count, + COUNT(1) AS doc_count, toUnixTimestamp(MAX(resources.datetime))*1000 AS max_datatime FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} @@ -813,13 +812,8 @@ def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_day e["startedAt"] = startTimestamp e["startTimestamp"] = startTimestamp e["endTimestamp"] = endTimestamp - - r = ch.execute(query=ch_query, - params={"step_size": step_size, "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, - "value": e["url"], - **__get_constraint_values(args)}) + params["value"] = e["url"] + r = ch.execute(query=ch_query, params=params) e["endedAt"] = r[-1]["max_datatime"] e["chart"] = [{"timestamp": i["timestamp"], "count": i["doc_count"]} for i in @@ -840,15 +834,16 @@ def get_network(project_id, startTimestamp=TimeUTC.now(delta_days=-1), with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, - resources.url_hostpath, COUNT(resources.session_id) AS doc_count + resources.url_hostpath, COUNT(1) AS doc_count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp, resources.url_hostpath - ORDER BY timestamp;""" - r = ch.execute(query=ch_query, - params={"step_size": step_size, "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + ORDER BY timestamp, doc_count DESC + LIMIT 10 BY timestamp;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + r = ch.execute(query=ch_query, params=params) results = [] @@ -956,6 +951,7 @@ def get_slowest_resources(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), type="all", density=19, **args): step_size = __get_step_size(startTimestamp, endTimestamp, density) ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + ch_sub_query.append("isNotNull(resources.url_hostpath)") ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) meta_condition = __get_meta_constraint(args) ch_sub_query += meta_condition @@ -1025,15 +1021,15 @@ def get_sessions_location(project_id, startTimestamp=TimeUTC.now(delta_days=-1), ch_sub_query += meta_condition with ch_client.ClickHouseClient() as ch: - ch_query = f"""SELECT user_country, COUNT(session_id) AS count + ch_query = f"""SELECT user_country, COUNT(1) AS count FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY user_country ORDER BY user_country;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) return {"count": sum(i["count"] for i in rows), "chart": helper.list_to_camel_case(rows)} @@ -1108,30 +1104,24 @@ def get_pages_response_time_distribution(project_id, startTimestamp=TimeUTC.now( with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT pages.response_time AS response_time, - COUNT(pages.session_id) AS count + COUNT(1) AS count FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY response_time ORDER BY response_time;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) ch_query = f"""SELECT COALESCE(avgOrNull(pages.response_time),0) AS avg FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)};""" - avg = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)})[0]["avg"] + avg = ch.execute(query=ch_query, params=params)[0]["avg"] quantiles_keys = [50, 90, 95, 99] ch_query = f"""SELECT quantilesExact({",".join([str(i / 100) for i in quantiles_keys])})(pages.response_time) AS values FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)};""" - quantiles = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + quantiles = ch.execute(query=ch_query, params=params) result = { "value": avg, "total": sum(r["count"] for r in rows), @@ -1228,15 +1218,15 @@ def get_busiest_time_of_day(project_id, startTimestamp=TimeUTC.now(delta_days=-1 with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT intDiv(toHour(sessions.datetime),2)*2 AS hour, - COUNT(sessions.session_id) AS count + COUNT(1) AS count FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY hour ORDER BY hour ASC;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) return __complete_missing_steps(rows=rows, start_time=0, end_time=24000, density=12, neutral={"count": 0}, time_key="hour", time_coefficient=1) @@ -1251,17 +1241,24 @@ def get_top_metrics(project_id, startTimestamp=TimeUTC.now(delta_days=-1), if value is not None: ch_sub_query.append("pages.url_path = %(value)s") with ch_client.ClickHouseClient() as ch: - ch_query = f"""SELECT (SELECT COALESCE(avgOrNull(pages.response_time),0) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.response_time) AND pages.response_time>0) AS avg_response_time, - (SELECT COUNT(pages.session_id) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)}) AS count_requests, - (SELECT COALESCE(avgOrNull(pages.first_paint),0) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.first_paint) AND pages.first_paint>0) AS avg_first_paint, - (SELECT COALESCE(avgOrNull(pages.dom_content_loaded_event_time),0) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.dom_content_loaded_event_time) AND pages.dom_content_loaded_event_time>0) AS avg_dom_content_loaded, - (SELECT COALESCE(avgOrNull(pages.ttfb),0) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.ttfb) AND pages.ttfb>0) AS avg_till_first_bit, - (SELECT COALESCE(avgOrNull(pages.time_to_interactive),0) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.time_to_interactive) AND pages.time_to_interactive >0) AS avg_time_to_interactive;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, - "value": value, **__get_constraint_values(args)}) + ch_query = f"""SELECT COALESCE(avgOrNull(if(pages.response_time>0,pages.response_time,null)),0) AS avg_response_time, + COALESCE(avgOrNull(if(pages.first_paint>0,pages.first_paint,null)),0) AS avg_first_paint, + COALESCE(avgOrNull(if(pages.dom_content_loaded_event_time>0,pages.dom_content_loaded_event_time,null)),0) AS avg_dom_content_loaded, + COALESCE(avgOrNull(if(pages.ttfb>0,pages.ttfb,null)),0) AS avg_till_first_bit, + COALESCE(avgOrNull(if(pages.time_to_interactive>0,pages.time_to_interactive,null)),0) AS avg_time_to_interactive, + (SELECT COUNT(1) FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)}) AS count_requests + FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} + WHERE {" AND ".join(ch_sub_query)} + AND (isNotNull(pages.response_time) AND pages.response_time>0 OR + isNotNull(pages.first_paint) AND pages.first_paint>0 OR + isNotNull(pages.dom_content_loaded_event_time) AND pages.dom_content_loaded_event_time>0 OR + isNotNull(pages.ttfb) AND pages.ttfb>0 OR + isNotNull(pages.time_to_interactive) AND pages.time_to_interactive >0);""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) return helper.dict_to_camel_case(rows[0]) @@ -1461,17 +1458,17 @@ def get_crashes(project_id, startTimestamp=TimeUTC.now(delta_days=-1), with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - COUNT(sessions.session_id) AS value + COUNT(1) AS value FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp ORDER BY timestamp;""" - rows = ch.execute(query=ch_query, - params={"step_size": step_size, - "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, - "session_ids": session_ids, **__get_constraint_values(args)}) + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "session_ids": session_ids, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) ch_query = f"""SELECT b.user_browser AS browser, sum(bv.count) AS total, groupArray([bv.user_browser_version, toString(bv.count)]) AS versions @@ -1480,14 +1477,14 @@ def get_crashes(project_id, startTimestamp=TimeUTC.now(delta_days=-1), FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY sessions.user_browser - ORDER BY COUNT(sessions.session_id) DESC + ORDER BY COUNT(1) DESC LIMIT 3 ) AS b INNER JOIN ( SELECT sessions.user_browser, sessions.user_browser_version, - COUNT(sessions.session_id) AS count + COUNT(1) AS count FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY sessions.user_browser, @@ -1496,12 +1493,7 @@ def get_crashes(project_id, startTimestamp=TimeUTC.now(delta_days=-1), ) AS bv USING (user_browser) GROUP BY b.user_browser ORDER BY b.user_browser;""" - browsers = ch.execute(query=ch_query, - params={"step_size": step_size, - "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, - "session_ids": session_ids, **__get_constraint_values(args)}) + browsers = ch.execute(query=ch_query, params=params) total = sum(r["total"] for r in browsers) for r in browsers: r["percentage"] = r["total"] / (total / 100) @@ -1546,12 +1538,12 @@ def get_domains_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), ch_query = f"""SELECT timestamp, groupArray([domain, toString(count)]) AS keys FROM (SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - resources.url_host AS domain, COUNT(resources.session_id) AS count + resources.url_host AS domain, COUNT(1) AS count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY timestamp,resources.url_host ORDER BY timestamp, count DESC - LIMIT 5) AS domain_stats + LIMIT 5 BY timestamp) AS domain_stats GROUP BY timestamp;""" params = {"project_id": project_id, "startTimestamp": startTimestamp, @@ -1577,8 +1569,8 @@ def get_domains_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), return result -def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), - endTimestamp=TimeUTC.now(), density=6, **args): +def __get_domains_errors_4xx_and_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): step_size = __get_step_size(startTimestamp, endTimestamp, density) ch_sub_query = __get_basic_constraints(table_name="resources", round_start=True, data=args) ch_sub_query.append("intDiv(resources.status, 100) == %(status_code)s") @@ -1589,18 +1581,18 @@ def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1) ch_query = f"""SELECT timestamp, groupArray([domain, toString(count)]) AS keys FROM (SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - resources.url_host AS domain, COUNT(resources.session_id) AS count + resources.url_host AS domain, COUNT(1) AS count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY timestamp,resources.url_host ORDER BY timestamp, count DESC - LIMIT 5) AS domain_stats + LIMIT 5 BY timestamp) AS domain_stats GROUP BY timestamp;""" params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, "step_size": step_size, - "status_code": 4, **__get_constraint_values(args)} + "status_code": status, **__get_constraint_values(args)} rows = ch.execute(query=ch_query, params=params) rows = __nested_array_to_dict_array(rows) neutral = __get_domains_errors_neutral(rows) @@ -1611,38 +1603,16 @@ def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1) density=density, neutral=neutral) +def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + return __get_domains_errors_4xx_and_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) + + def get_domains_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), density=6, **args): - step_size = __get_step_size(startTimestamp, endTimestamp, density) - ch_sub_query = __get_basic_constraints(table_name="resources", round_start=True, data=args) - ch_sub_query.append("intDiv(resources.status, 100) == %(status_code)s") - meta_condition = __get_meta_constraint(args) - ch_sub_query += meta_condition - - with ch_client.ClickHouseClient() as ch: - ch_query = f"""SELECT timestamp, - groupArray([domain, toString(count)]) AS keys - FROM (SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - resources.url_host AS domain, COUNT(resources.session_id) AS count - FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} - WHERE {" AND ".join(ch_sub_query)} - GROUP BY timestamp,resources.url_host - ORDER BY timestamp, count DESC - LIMIT 5) AS domain_stats - GROUP BY timestamp;""" - params = {"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, - "step_size": step_size, - "status_code": 5, **__get_constraint_values(args)} - rows = ch.execute(query=ch_query, params=params) - rows = __nested_array_to_dict_array(rows) - neutral = __get_domains_errors_neutral(rows) - rows = __merge_rows_with_neutral(rows, neutral) - - return __complete_missing_steps(rows=rows, start_time=startTimestamp, - end_time=endTimestamp, - density=density, neutral=neutral) + return __get_domains_errors_4xx_and_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) def __nested_array_to_dict_array(rows): @@ -1690,16 +1660,16 @@ def get_errors_per_domains(project_id, startTimestamp=TimeUTC.now(delta_days=-1) with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT resources.url_host AS domain, - COUNT(resources.session_id) AS errors_count + COUNT(1) AS errors_count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY resources.url_host ORDER BY errors_count DESC LIMIT 5;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) return helper.list_to_camel_case(rows) @@ -1716,7 +1686,7 @@ def get_sessions_per_browser(project_id, startTimestamp=TimeUTC.now(delta_days=- FROM ( SELECT sessions.user_browser, - COUNT(sessions.session_id) AS count + COUNT(1) AS count FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY sessions.user_browser @@ -1727,7 +1697,7 @@ def get_sessions_per_browser(project_id, startTimestamp=TimeUTC.now(delta_days=- ( SELECT sessions.user_browser, sessions.user_browser_version, - COUNT(sessions.session_id) AS count + COUNT(1) AS count FROM sessions {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY @@ -1739,10 +1709,10 @@ def get_sessions_per_browser(project_id, startTimestamp=TimeUTC.now(delta_days=- GROUP BY b.user_browser, b.count ORDER BY b.count DESC;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) for i, r in enumerate(rows): versions = {} for j in range(len(r["versions"])): @@ -1763,67 +1733,58 @@ def get_calls_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endT with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT resources.method, resources.url_hostpath, - COUNT(resources.session_id) AS all_requests, + COUNT(1) AS all_requests, SUM(if(intDiv(resources.status, 100) == 4, 1, 0)) AS _4xx, SUM(if(intDiv(resources.status, 100) == 5, 1, 0)) AS _5xx FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY resources.method, resources.url_hostpath - ORDER BY (_4xx + _5xx), all_requests DESC + ORDER BY (_4xx + _5xx) DESC, all_requests DESC LIMIT 50;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return helper.list_to_camel_case(rows) + + +def __get_calls_errors_4xx_or_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + platform=None, **args): + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + ch_sub_query.append("resources.type = 'fetch'") + ch_sub_query.append(f"intDiv(resources.status, 100) == {status}") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT resources.method, + resources.url_hostpath, + COUNT(1) AS all_requests + FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} + WHERE {" AND ".join(ch_sub_query)} + GROUP BY resources.method, resources.url_hostpath + ORDER BY all_requests DESC + LIMIT 10;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) return helper.list_to_camel_case(rows) def get_calls_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), platform=None, **args): - ch_sub_query = __get_basic_constraints(table_name="resources", data=args) - ch_sub_query.append("resources.type = 'fetch'") - ch_sub_query.append("intDiv(resources.status, 100) == 4") - meta_condition = __get_meta_constraint(args) - ch_sub_query += meta_condition - - with ch_client.ClickHouseClient() as ch: - ch_query = f"""SELECT resources.method, - resources.url_hostpath, - COUNT(resources.session_id) AS all_requests - FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} - WHERE {" AND ".join(ch_sub_query)} - GROUP BY resources.method, resources.url_hostpath - ORDER BY all_requests DESC - LIMIT 10;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) - return helper.list_to_camel_case(rows) + return __get_calls_errors_4xx_or_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, + platform=platform, **args) def get_calls_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), platform=None, **args): - ch_sub_query = __get_basic_constraints(table_name="resources", data=args) - ch_sub_query.append("resources.type = 'fetch'") - ch_sub_query.append("intDiv(resources.status, 100) == 5") - meta_condition = __get_meta_constraint(args) - ch_sub_query += meta_condition - - with ch_client.ClickHouseClient() as ch: - ch_query = f"""SELECT resources.method, - resources.url_hostpath, - COUNT(resources.session_id) AS all_requests - FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} - WHERE {" AND ".join(ch_sub_query)} - GROUP BY resources.method, resources.url_hostpath - ORDER BY all_requests DESC - LIMIT 10;""" - rows = ch.execute(query=ch_query, - params={"project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) - return helper.list_to_camel_case(rows) + return __get_calls_errors_4xx_or_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, + platform=platform, **args) def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), @@ -1866,15 +1827,11 @@ def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), e "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)} rows = helper.list_to_camel_case(ch.execute(query=ch_query, params=params)) - for r in rows: - print(r) + return __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, density=density, - neutral={"4xx": 0, - "5xx": 0, - "js": 0, - "integrations": 0}) + neutral={"4xx": 0, "5xx": 0, "js": 0, "integrations": 0}) def resource_type_vs_response_end(project_id, startTimestamp=TimeUTC.now(delta_days=-1), @@ -1894,7 +1851,7 @@ def resource_type_vs_response_end(project_id, startTimestamp=TimeUTC.now(delta_d "endTimestamp": endTimestamp, **__get_constraint_values(args)} with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - COUNT(resources.session_id) AS total, + COUNT(1) AS total, SUM(if(resources.type='fetch',1,0)) AS xhr FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} @@ -1962,10 +1919,8 @@ def get_resources_vs_visually_complete(project_id, startTimestamp=TimeUTC.now(de endTimestamp=TimeUTC.now(), density=7, **args): step_size = __get_step_size(startTimestamp, endTimestamp, density) ch_sub_query = __get_basic_constraints(table_name="resources", data=args) - ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) meta_condition = __get_meta_constraint(args) ch_sub_query += meta_condition - ch_sub_query_chart += meta_condition with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(s.base_datetime, toIntervalSecond(%(step_size)s))) * 1000 AS timestamp, @@ -1974,27 +1929,27 @@ def get_resources_vs_visually_complete(project_id, startTimestamp=TimeUTC.now(de FROM ( SELECT resources.session_id, MIN(resources.datetime) AS base_datetime, - COUNT(resources.url) AS count + COUNT(1) AS count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} - WHERE {" AND ".join(ch_sub_query_chart)} + WHERE {" AND ".join(ch_sub_query)} GROUP BY resources.session_id ) AS s INNER JOIN (SELECT session_id, type, COALESCE(avgOrNull(NULLIF(count,0)),0) AS xavg - FROM (SELECT resources.session_id, resources.type, COUNT(resources.url) AS count + FROM (SELECT resources.session_id, resources.type, COUNT(1) AS count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY resources.session_id, resources.type) AS ss GROUP BY ss.session_id, ss.type) AS t USING (session_id) GROUP BY timestamp ORDER BY timestamp ASC;""" - rows = ch.execute(query=ch_query, - params={"step_size": step_size, - "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) for r in rows: types = {} for i in range(len(r["types"])): @@ -2030,17 +1985,17 @@ def get_resources_count_by_type(project_id, startTimestamp=TimeUTC.now(delta_day groupArray([toString(t.type), toString(t.count)]) AS types FROM(SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, resources.type, - COUNT(resources.session_id) AS count + COUNT(1) AS count FROM resources {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp,resources.type ORDER BY timestamp) AS t GROUP BY timestamp;""" - rows = ch.execute(query=ch_query, - params={"step_size": step_size, - "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) for r in rows: for t in r["types"]: r[t[0]] = t[1] @@ -2056,6 +2011,7 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1) step_size = __get_step_size(startTimestamp, endTimestamp, density) ch_sub_query = __get_basic_constraints(table_name="resources", round_start=True, data=args) ch_sub_query.append("resources.success = 0") + ch_sub_query.append("resources.type IN ('fetch','script')") sch_sub_query = ["rs.project_id =toUInt32(%(project_id)s)", "rs.type IN ('fetch','script')"] meta_condition = __get_meta_constraint(args) ch_sub_query += meta_condition @@ -2063,8 +2019,8 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1) with ch_client.ClickHouseClient() as ch: ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, - SUM(if(first.url_host = sub_resources.url_host, 1, 0)) AS first_party, - SUM(if(first.url_host = sub_resources.url_host, 0, 1)) AS third_party + SUM(first.url_host = sub_resources.url_host) AS first_party, + SUM(first.url_host != sub_resources.url_host) AS third_party FROM ( SELECT resources.datetime, resources.url_host @@ -2075,7 +2031,7 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1) ( SELECT rs.url_host, - COUNT(rs.session_id) AS count + COUNT(1) AS count FROM resources AS rs WHERE {" AND ".join(sch_sub_query)} GROUP BY rs.url_host @@ -2084,11 +2040,11 @@ def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1) ) AS first GROUP BY timestamp ORDER BY timestamp;""" - rows = ch.execute(query=ch_query, - params={"step_size": step_size, - "project_id": project_id, - "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) return helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, density=density, @@ -2476,7 +2432,7 @@ def __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTim ch_sub_query += meta_condition ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(count)),0) AS value - FROM (SELECT COUNT(session_id) AS count + FROM (SELECT COUNT(1) AS count FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)} GROUP BY session_id) AS groupped_data @@ -2496,10 +2452,10 @@ def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, ch_sub_query_chart += meta_condition params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, - "endTimestamp": endTimestamp} + "endTimestamp": endTimestamp, **__get_constraint_values(args)} ch_query = f"""SELECT timestamp, COALESCE(avgOrNull(count), 0) AS value FROM (SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, - session_id, COUNT(pages.session_id) AS count + session_id, COUNT(1) AS count FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp,session_id @@ -2507,7 +2463,7 @@ def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, WHERE count>0 GROUP BY timestamp ORDER BY timestamp;""" - rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = ch.execute(query=ch_query, params=params) rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, density=density, neutral={"value": 0}) @@ -2604,11 +2560,11 @@ def get_top_metrics_avg_response_time(project_id, startTimestamp=TimeUTC.now(del rows = ch.execute(query=ch_query, params=params) results = rows[0] ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, - COUNT(pages.response_time) AS value - FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} - WHERE {" AND ".join(ch_sub_query_chart)} AND isNotNull(pages.response_time) AND pages.response_time>0 - GROUP BY timestamp - ORDER BY timestamp;""" + COALESCE(avgOrNull(pages.response_time),0) AS value + FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} + WHERE {" AND ".join(ch_sub_query_chart)} AND isNotNull(pages.response_time) AND pages.response_time>0 + GROUP BY timestamp + ORDER BY timestamp;""" rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, @@ -2631,7 +2587,7 @@ def get_top_metrics_count_requests(project_id, startTimestamp=TimeUTC.now(delta_ ch_sub_query.append("pages.url_path = %(value)s") ch_sub_query_chart.append("pages.url_path = %(value)s") with ch_client.ClickHouseClient() as ch: - ch_query = f"""SELECT COUNT(pages.session_id) AS value + ch_query = f"""SELECT COUNT(1) AS value FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query)};""" params = {"step_size": step_size, "project_id": project_id, @@ -2641,7 +2597,7 @@ def get_top_metrics_count_requests(project_id, startTimestamp=TimeUTC.now(delta_ rows = ch.execute(query=ch_query, params=params) result = rows[0] ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, - COUNT(pages.session_id) AS value + COUNT(1) AS value FROM pages {"INNER JOIN sessions_metadata USING(session_id)" if len(meta_condition) > 0 else ""} WHERE {" AND ".join(ch_sub_query_chart)} GROUP BY timestamp diff --git a/ee/api/chalicelib/core/metrics_exp.py b/ee/api/chalicelib/core/metrics_exp.py new file mode 100644 index 000000000..958a335b9 --- /dev/null +++ b/ee/api/chalicelib/core/metrics_exp.py @@ -0,0 +1,2800 @@ +import math + +import schemas +from chalicelib.utils import pg_client, exp_ch_helper +from chalicelib.utils import args_transformer +from chalicelib.utils import helper +from chalicelib.utils.TimeUTC import TimeUTC +from chalicelib.utils import ch_client +from math import isnan +from chalicelib.utils.metrics_helper import __get_step_size + + +def __get_basic_constraints(table_name=None, time_constraint=True, round_start=False, data={}, identifier="project_id"): + if table_name: + table_name += "." + else: + table_name = "" + ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"] + if time_constraint: + if round_start: + ch_sub_query.append( + f"toStartOfInterval({table_name}datetime, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)") + else: + ch_sub_query.append(f"{table_name}datetime >= toDateTime(%(startTimestamp)s/1000)") + ch_sub_query.append(f"{table_name}datetime < toDateTime(%(endTimestamp)s/1000)") + return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name) + + +def __frange(start, stop, step): + result = [] + i = start + while i < stop: + result.append(i) + i += step + return result + + +def __add_missing_keys(original, complete): + for missing in [key for key in complete.keys() if key not in original.keys()]: + original[missing] = complete[missing] + return original + + +def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000): + if len(rows) == density: + return rows + step = __get_step_size(start_time, end_time, density, decimal=True) + optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in + __frange(start_time // time_coefficient, end_time // time_coefficient, step)] + result = [] + r = 0 + o = 0 + for i in range(density): + neutral_clone = dict(neutral) + for k in neutral_clone.keys(): + if callable(neutral_clone[k]): + neutral_clone[k] = neutral_clone[k]() + if r < len(rows) and len(result) + len(rows) - r == density: + result += rows[r:] + break + if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]: + # complete missing keys in original object + rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) + result.append(rows[r]) + r += 1 + elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]: + # complete missing keys in original object + rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) + result.append(rows[r]) + r += 1 + o += 1 + else: + neutral_clone[time_key] = optimal[o][0] + result.append(neutral_clone) + o += 1 + # elif r < len(rows) and rows[r][time_key] >= optimal[o][1]: + # neutral_clone[time_key] = optimal[o][0] + # result.append(neutral_clone) + # o += 1 + # else: + # neutral_clone[time_key] = optimal[o][0] + # result.append(neutral_clone) + # o += 1 + return result + + +def __merge_charts(list1, list2, time_key="timestamp"): + if len(list1) != len(list2): + raise Exception("cannot merge unequal lists") + result = [] + for i in range(len(list1)): + timestamp = min(list1[i][time_key], list2[i][time_key]) + result.append({**list1[i], **list2[i], time_key: timestamp}) + return result + + +def __get_constraint(data, fields, table_name): + constraints = [] + # for k in fields.keys(): + for i, f in enumerate(data.get("filters", [])): + if f["key"] in fields.keys(): + if f["value"] in ["*", ""]: + constraints.append(f"isNotNull({table_name}{fields[f['key']]})") + else: + constraints.append(f"{table_name}{fields[f['key']]} = %({f['key']}_{i})s") + # TODO: remove this in next release + offset = len(data.get("filters", [])) + for i, f in enumerate(data.keys()): + if f in fields.keys(): + if data[f] in ["*", ""]: + constraints.append(f"isNotNull({table_name}{fields[f]})") + else: + constraints.append(f"{table_name}{fields[f]} = %({f}_{i + offset})s") + return constraints + + +def __get_constraint_values(data): + params = {} + for i, f in enumerate(data.get("filters", [])): + params[f"{f['key']}_{i}"] = f["value"] + + # TODO: remove this in next release + offset = len(data.get("filters", [])) + for i, f in enumerate(data.keys()): + params[f"{f}_{i + offset}"] = data[f] + return params + + +METADATA_FIELDS = {"userId": "user_id", + "userAnonymousId": "user_anonymous_id", + "metadata1": "metadata_1", + "metadata2": "metadata_2", + "metadata3": "metadata_3", + "metadata4": "metadata_4", + "metadata5": "metadata_5", + "metadata6": "metadata_6", + "metadata7": "metadata_7", + "metadata8": "metadata_8", + "metadata9": "metadata_9", + "metadata10": "metadata_10"} + + +def __get_meta_constraint(data): + return __get_constraint(data=data, fields=METADATA_FIELDS, table_name="sessions_metadata.") + + +SESSIONS_META_FIELDS = {"revId": "rev_id", + "country": "user_country", + "os": "user_os", + "platform": "user_device_type", + "device": "user_device", + "browser": "user_browser"} + + +def __get_generic_constraint(data, table_name): + return __get_constraint(data=data, fields=SESSIONS_META_FIELDS, table_name=table_name) + + +def get_processed_sessions(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + with ch_client.ClickHouseClient() as ch: + ch_query = f"""\ + SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT sessions.session_id) AS value + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;\ + """ + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + results = { + "value": sum([r["value"] for r in rows]), + "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, + density=density, + neutral={"value": 0}) + } + + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + + ch_query = f""" SELECT COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + count = ch.execute(query=ch_query, params=params) + + count = count[0]["count"] + + results["progress"] = helper.__progress(old_val=count, new_val=results["value"]) + results["unit"] = schemas.TemplatePredefinedUnits.count + return results + + +def get_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + + ch_sub_query = __get_basic_constraints(table_name="errors", data=args) + ch_sub_query.append("errors.event_type = 'ERROR'") + ch_sub_query.append("errors.source = 'js_exception'") + ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args) + ch_sub_query_chart.append("errors.event_type = 'ERROR'") + ch_sub_query_chart.append("errors.source = 'js_exception'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""\ + SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT errors.session_id) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;\ + """ + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + results = { + "count": 0 if len(rows) == 0 else __count_distinct_errors(ch, project_id, startTimestamp, endTimestamp, + ch_sub_query), + "impactedSessions": sum([r["count"] for r in rows]), + "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, + density=density, + neutral={"count": 0}) + } + + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + count = __count_distinct_errors(ch, project_id, startTimestamp, endTimestamp, ch_sub_query, + meta=len(meta_condition) > 0, **args) + results["progress"] = helper.__progress(old_val=count, new_val=results["count"]) + return results + + +def __count_distinct_errors(ch, project_id, startTimestamp, endTimestamp, ch_sub_query, meta=False, **args): + ch_query = f"""\ + SELECT + COUNT(DISTINCT errors.message) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query)};""" + count = ch.execute(query=ch_query, + params={"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + + if count is not None and len(count) > 0: + return count[0]["count"] + + return 0 + + +def get_errors_trend(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="errors", data=args) + ch_sub_query.append("errors.event_type='ERROR'") + ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args) + ch_sub_query_chart.append("errors.event_type='ERROR'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT * + FROM (SELECT errors.error_id AS error_id, + errors.message AS error, + COUNT(1) AS count, + COUNT(DISTINCT errors.session_id) AS sessions + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query)} + GROUP BY errors.error_id, errors.message) AS errors_chart + INNER JOIN (SELECT error_id AS error_id, + toUnixTimestamp(MAX(datetime))*1000 AS lastOccurrenceAt, + toUnixTimestamp(MIN(datetime))*1000 AS firstOccurrenceAt + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE event_type='ERROR' AND project_id=%(project_id)s + GROUP BY error_id) AS errors_time USING(error_id) + ORDER BY sessions DESC, count DESC LIMIT 10;""" + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + + # print(f"got {len(rows)} rows") + if len(rows) == 0: + return [] + error_ids = [r["error_id"] for r in rows] + ch_sub_query.append("error_id = %(error_id)s") + errors = {} + for error_id in error_ids: + ch_query = f"""\ + SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params["error_id"] = error_id + errors[error_id] = ch.execute(query=ch_query, params=params) + + for row in rows: + row["startTimestamp"] = startTimestamp + row["endTimestamp"] = endTimestamp + row["chart"] = __complete_missing_steps(rows=errors[row["error_id"]], start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"count": 0}) + + return rows + + +def get_page_metrics(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + rows = __get_page_metrics(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_page_metrics(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + for key in previous.keys(): + results[key + "Progress"] = helper.__progress(old_val=previous[key], new_val=results[key]) + return results + + +def __get_page_metrics(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("(pages.dom_content_loaded_event_end>0 OR pages.first_contentful_paint_time>0)") + # changed dom_content_loaded_event_start to dom_content_loaded_event_end + ch_query = f"""SELECT COALESCE(avgOrNull(NULLIF(pages.dom_content_loaded_event_end ,0)),0) AS avg_dom_content_load_start, + COALESCE(avgOrNull(NULLIF(pages.first_contentful_paint_time,0)),0) AS avg_first_contentful_pixel + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "type": 'fetch', "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return rows + + +def get_application_activity(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + row = __get_application_activity(ch, project_id, startTimestamp, endTimestamp, **args) + results = helper.dict_to_camel_case(row) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + row = __get_application_activity(ch, project_id, startTimestamp, endTimestamp, **args) + previous = helper.dict_to_camel_case(row) + for key in previous.keys(): + results[key + "Progress"] = helper.__progress(old_val=previous[key], new_val=results[key]) + return results + + +def __get_application_activity(ch, project_id, startTimestamp, endTimestamp, **args): + result = {} + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + ch_query = f"""SELECT COALESCE(avgOrNull(pages.load_event_end),0) AS avg_page_load_time + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} AND pages.load_event_end>0;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + row = ch.execute(query=ch_query, params=params)[0] + result = {**result, **row} + + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + # ch_sub_query.append("events.event_type='RESOURCE'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("resources.type= %(type)s") + ch_query = f"""SELECT COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} AND resources.duration>0;""" + row = ch.execute(query=ch_query, + params={"project_id": project_id, "type": 'img', "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)})[0] + result = {**result, "avg_image_load_time": row["avg"]} + row = ch.execute(query=ch_query, + params={"project_id": project_id, "type": 'fetch', "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)})[0] + result = {**result, "avg_request_load_time": row["avg"]} + + for k in result: + if result[k] is None: + result[k] = 0 + return result + + +def get_user_activity(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + results = {} + + with ch_client.ClickHouseClient() as ch: + rows = __get_user_activity(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + for key in results: + if isnan(results[key]): + results[key] = 0 + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_user_activity(ch, project_id, startTimestamp, endTimestamp, **args) + + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + for key in previous: + results[key + "Progress"] = helper.__progress(old_val=previous[key], new_val=results[key]) + return results + + +def __get_user_activity(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("(sessions.pages_count>0 OR sessions.duration>0)") + ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(NULLIF(sessions.pages_count,0))),0) AS avg_visited_pages, + COALESCE(avgOrNull(NULLIF(sessions.duration,0)),0) AS avg_session_duration + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + return rows + + +def get_slowest_images(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + # ch_sub_query.append("events.event_type='RESOURCE'") + ch_sub_query.append("resources.type = 'img'") + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + # ch_sub_query_chart.append("events.event_type='RESOURCE'") + ch_sub_query_chart.append("resources.type = 'img'") + ch_sub_query_chart.append("resources.url IN %(url)s") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT resources.url, + COALESCE(avgOrNull(resources.duration),0) AS avg, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} AND resources.duration>0 + GROUP BY resources.url ORDER BY avg DESC LIMIT 10;""" + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + + rows = [{"url": i["url"], "avgDuration": i["avg"], "sessions": i["count"]} for i in rows] + if len(rows) == 0: + return [] + urls = [row["url"] for row in rows] + + charts = {} + ch_query = f"""SELECT url, + toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} AND resources.duration>0 + GROUP BY url, timestamp + ORDER BY url, timestamp;""" + params["url"] = urls + # print(ch.format(query=ch_query, params=params)) + u_rows = ch.execute(query=ch_query, params=params) + for url in urls: + sub_rows = [] + for r in u_rows: + if r["url"] == url: + sub_rows.append(r) + elif len(sub_rows) > 0: + break + charts[url] = [{"timestamp": int(i["timestamp"]), + "avgDuration": i["avg"]} + for i in __complete_missing_steps(rows=sub_rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"avg": 0})] + for i in range(len(rows)): + rows[i] = helper.dict_to_camel_case(rows[i]) + rows[i]["chart"] = helper.list_to_camel_case(charts[rows[i]["url"]]) + + return sorted(rows, key=lambda k: k["sessions"], reverse=True) + + +def __get_performance_constraint(l): + if len(l) == 0: + return "" + l = [s.decode('UTF-8').replace("%", "%%") for s in l] + return f"AND ({' OR '.join(l)})" + + +def get_performance(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + density=19, resources=None, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + location_constraints = [] + img_constraints = [] + request_constraints = [] + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + # ch_sub_query_chart.append("event_type='RESOURCE'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + img_constraints_vals = {} + location_constraints_vals = {} + request_constraints_vals = {} + + if resources and len(resources) > 0: + for r in resources: + if r["type"] == "IMG": + img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s") + img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value'] + elif r["type"] == "LOCATION": + location_constraints.append(f"pages.url_path = %(val_{len(location_constraints)})s") + location_constraints_vals["val_" + str(len(location_constraints) - 1)] = r['value'] + else: + request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s") + request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value'] + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + AND resources.type = 'img' AND resources.duration>0 + {(f' AND ({" OR ".join(img_constraints)})') if len(img_constraints) > 0 else ""} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **img_constraints_vals, **__get_constraint_values(args)}) + images = [{"timestamp": i["timestamp"], "avgImageLoadTime": i["avg"]} for i in + __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"avg": 0})] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + AND resources.type = 'fetch' AND resources.duration>0 + {(f' AND ({" OR ".join(request_constraints)})') if len(request_constraints) > 0 else ""} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, + params={**params, **request_constraints_vals, **__get_constraint_values(args)}) + requests = [{"timestamp": i["timestamp"], "avgRequestLoadTime": i["avg"]} for i in + __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, density=density, + neutral={"avg": 0})] + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + ch_sub_query_chart += meta_condition + + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(pages.load_event_end),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} AND pages.load_event_end>0 + {(f' AND ({" OR ".join(location_constraints)})') if len(location_constraints) > 0 else ""} + GROUP BY timestamp + ORDER BY timestamp;""" + + rows = ch.execute(query=ch_query, + params={**params, **location_constraints_vals, **__get_constraint_values(args)}) + pages = [{"timestamp": i["timestamp"], "avgPageLoadTime": i["avg"]} for i in + __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"avg": 0})] + + rows = helper.merge_lists_by_key(helper.merge_lists_by_key(pages, requests, "timestamp"), images, "timestamp") + + for s in rows: + for k in s: + if s[k] is None: + s[k] = 0 + return {"chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"avgImageLoadTime": 0, + "avgRequestLoadTime": 0, + "avgPageLoadTime": 0}))} + + +RESOURCS_TYPE_TO_DB_TYPE = { + "img": "IMG", + "fetch": "REQUEST", + "stylesheet": "STYLESHEET", + "script": "SCRIPT", + "other": "OTHER", + "media": "MEDIA" +} + + +def __get_resource_type_from_db_type(db_type): + db_type = db_type.lower() + return RESOURCS_TYPE_TO_DB_TYPE.get(db_type, db_type) + + +def __get_resource_db_type_from_type(resource_type): + resource_type = resource_type.upper() + return {v: k for k, v in RESOURCS_TYPE_TO_DB_TYPE.items()}.get(resource_type, resource_type) + + +def search(text, resource_type, project_id, performance=False, pages_only=False, events_only=False, + metadata=False, key=None, platform=None): + if text.startswith("^"): + text = text[1:] + if not resource_type: + data = [] + if metadata: + resource_type = "METADATA" + elif pages_only or performance: + resource_type = "LOCATION" + else: + resource_type = "ALL" + data.extend(search(text=text, resource_type=resource_type, project_id=project_id, + performance=performance, pages_only=pages_only, events_only=events_only, key=key, + platform=platform)) + return data + + ch_sub_query = __get_basic_constraints(time_constraint=False, + data={} if platform is None else {"platform": platform}) + + if resource_type == "ALL" and not pages_only and not events_only: + ch_sub_query.append("positionUTF8(url_path,%(value)s)!=0") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT arrayJoin(arraySlice(arrayReverseSort(arrayDistinct(groupArray(url_path))), 1, 5)) AS value, + type AS key + FROM {exp_ch_helper.get_main_resources_table(0)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY type + ORDER BY type ASC;""" + # print(ch.format(query=ch_query, + # params={"project_id": project_id, + # "value": text})) + rows = ch.execute(query=ch_query, + params={"project_id": project_id, + "value": text}) + rows = [{"value": i["value"], "type": __get_resource_type_from_db_type(i["key"])} for i in rows] + elif resource_type == "ALL" and events_only: + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT DISTINCT value AS value, type AS key + FROM {exp_ch_helper.get_autocomplete_table(0)} autocomplete + WHERE {" AND ".join(ch_sub_query)} + AND positionUTF8(lowerUTF8(value), %(value)s) != 0 + AND type IN ('LOCATION','INPUT','CLICK') + ORDER BY type, value + LIMIT 10 BY type;""" + rows = ch.execute(query=ch_query, + params={"project_id": project_id, + "value": text.lower(), + "platform_0": platform}) + rows = [{"value": i["value"], "type": i["key"]} for i in rows] + elif resource_type in ['IMG', 'REQUEST', 'STYLESHEET', 'OTHER', 'SCRIPT'] and not pages_only: + ch_sub_query.append("positionUTF8(url_path,%(value)s)!=0") + ch_sub_query.append(f"resources.type = '{__get_resource_db_type_from_type(resource_type)}'") + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT DISTINCT url_path AS value, + %(resource_type)s AS key + FROM {exp_ch_helper.get_main_resources_table(0)} AS resources + WHERE {" AND ".join(ch_sub_query)} + LIMIT 10;""" + rows = ch.execute(query=ch_query, + params={"project_id": project_id, + "value": text, + "resource_type": resource_type, + "platform_0": platform}) + rows = [{"value": i["value"], "type": i["key"]} for i in rows] + elif resource_type == 'LOCATION': + with ch_client.ClickHouseClient() as ch: + ch_sub_query.append("type='LOCATION'") + ch_sub_query.append("positionUTF8(value,%(value)s)!=0") + ch_query = f"""SELECT + DISTINCT value AS value, + 'LOCATION' AS key + FROM {exp_ch_helper.get_autocomplete_table(0)} AS autocomplete + WHERE {" AND ".join(ch_sub_query)} + LIMIT 10;""" + rows = ch.execute(query=ch_query, + params={"project_id": project_id, + "value": text, + "platform_0": platform}) + rows = [{"value": i["value"], "type": i["key"]} for i in rows] + elif resource_type == "INPUT": + with ch_client.ClickHouseClient() as ch: + ch_sub_query.append("positionUTF8(lowerUTF8(value), %(value)s) != 0") + ch_sub_query.append("type='INPUT") + ch_query = f"""SELECT DISTINCT label AS value, 'INPUT' AS key + FROM {exp_ch_helper.get_autocomplete_table(0)} AS autocomplete + WHERE {" AND ".join(ch_sub_query)} + LIMIT 10;""" + rows = ch.execute(query=ch_query, + params={"project_id": project_id, + "value": text.lower(), + "platform_0": platform}) + rows = [{"value": i["value"], "type": i["key"]} for i in rows] + elif resource_type == "CLICK": + with ch_client.ClickHouseClient() as ch: + ch_sub_query.append("positionUTF8(lowerUTF8(value), %(value)s) != 0") + ch_sub_query.append("type='CLICK'") + ch_query = f"""SELECT DISTINCT value AS value, 'CLICK' AS key + FROM {exp_ch_helper.get_autocomplete_table(0)} AS autocomplete + WHERE {" AND ".join(ch_sub_query)} + LIMIT 10;""" + rows = ch.execute(query=ch_query, + params={"project_id": project_id, + "value": text.lower(), + "platform_0": platform}) + rows = [{"value": i["value"], "type": i["key"]} for i in rows] + elif resource_type == "METADATA": + if key and len(key) > 0 and key in {**METADATA_FIELDS, **SESSIONS_META_FIELDS}.keys(): + if key in METADATA_FIELDS.keys(): + ch_sub_query.append( + f"positionCaseInsensitiveUTF8(sessions.{METADATA_FIELDS[key]},%(value)s)!=0") + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT DISTINCT sessions.{METADATA_FIELDS[key]} AS value, + %(key)s AS key + FROM {exp_ch_helper.get_main_sessions_table(0)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + LIMIT 10;""" + rows = ch.execute(query=ch_query, + params={"project_id": project_id, "value": text, "key": key, + "platform_0": platform}) + else: + ch_sub_query.append(f"positionCaseInsensitiveUTF8(sessions.{SESSIONS_META_FIELDS[key]},%(value)s)>0") + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT DISTINCT sessions.{SESSIONS_META_FIELDS[key]} AS value, + '{key}' AS key + FROM {exp_ch_helper.get_main_sessions_table(0)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + LIMIT 10;""" + rows = ch.execute(query=ch_query, params={"project_id": project_id, "value": text, "key": key, + "platform_0": platform}) + else: + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT DISTINCT value AS value, + type AS key + FROM {exp_ch_helper.get_autocomplete_table(0)} AS autocomplete + WHERE project_id = toUInt16(2460) + AND positionCaseInsensitiveUTF8(value, %(value)s) != 0 + LIMIT 10 BY type""" + + # print(ch.format(query=ch_query, params={"project_id": project_id, "value": text, "key": key, + # "platform_0": platform})) + rows = ch.execute(query=ch_query, params={"project_id": project_id, "value": text, "key": key, + "platform_0": platform}) + else: + return [] + return [helper.dict_to_camel_case(row) for row in rows] + + +def get_missing_resources_trend(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + ch_sub_query.append("resources.success = 0") + ch_sub_query.append("resources.type = 'img'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT resources.url_path AS key, + COUNT(1) AS doc_count + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY url_path + ORDER BY doc_count DESC + LIMIT 10;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + + rows = [{"url": i["key"], "sessions": i["doc_count"]} for i in rows] + if len(rows) == 0: + return [] + ch_sub_query.append("events.url_path = %(value)s") + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COUNT(1) AS doc_count, + toUnixTimestamp(MAX(resources.datetime))*1000 AS max_datatime + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY timestamp + ORDER BY timestamp;""" + for e in rows: + e["startedAt"] = startTimestamp + e["startTimestamp"] = startTimestamp + e["endTimestamp"] = endTimestamp + params["value"] = e["url"] + r = ch.execute(query=ch_query, params=params) + + e["endedAt"] = r[-1]["max_datatime"] + e["chart"] = [{"timestamp": i["timestamp"], "count": i["doc_count"]} for i in + __complete_missing_steps(rows=r, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"doc_count": 0})] + return rows + + +def get_network(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + # ch_sub_query_chart.append("events.event_type='RESOURCE'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + resources.url_path, COUNT(1) AS doc_count + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp, resources.url_path + ORDER BY timestamp, doc_count DESC + LIMIT 10 BY timestamp;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + r = ch.execute(query=ch_query, params=params) + + results = [] + + i = 0 + while i < len(r): + results.append({"timestamp": r[i]["timestamp"], "domains": []}) + i += 1 + while i < len(r) and r[i]["timestamp"] == results[-1]["timestamp"]: + results[-1]["domains"].append({r[i]["url_path"]: r[i]["doc_count"]}) + i += 1 + + return {"startTimestamp": startTimestamp, "endTimestamp": endTimestamp, "chart": results} + + +KEYS = { + 'startTimestamp': args_transformer.int_arg, + 'endTimestamp': args_transformer.int_arg, + 'density': args_transformer.int_arg, + 'performanceDensity': args_transformer.int_arg, + 'platform': args_transformer.string +} + + +def dashboard_args(params): + args = {} + if params is not None: + for key in params.keys(): + if key in KEYS.keys(): + args[key] = KEYS[key](params.get(key)) + return args + + +def get_resources_loading_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=19, type=None, url=None, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + if type is not None: + ch_sub_query_chart.append(f"resources.type = '{__get_resource_db_type_from_type(type)}'") + if url is not None: + ch_sub_query_chart.append(f"resources.url = %(value)s") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + ch_sub_query_chart.append("resources.duration>0") + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": url, "type": type, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"avg": avg, "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"avg": 0})} + + +def get_pages_dom_build_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=19, url=None, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + if url is not None: + ch_sub_query_chart.append(f"pages.url_path = %(value)s") + ch_sub_query_chart.append("isNotNull(pages.dom_building_time)") + ch_sub_query_chart.append("pages.dom_building_time>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(pages.dom_building_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": url, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(pages.dom_building_time),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + + results = {"value": avg, + "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0})} + helper.__time_value(results) + return results + + +def get_slowest_resources(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), type="all", density=19, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + ch_sub_query.append("isNotNull(resources.name)") + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + + if type is not None and type.upper() != "ALL": + sq = f"resources.type = '{__get_resource_db_type_from_type(type.upper())}'" + else: + sq = "resources.type != 'fetch'" + ch_sub_query.append(sq) + ch_sub_query_chart.append(sq) + ch_sub_query_chart.append("isNotNull(resources.duration)") + ch_sub_query_chart.append("resources.duration>0") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT any(url) AS url, any(type) AS type, name, + COALESCE(avgOrNull(NULLIF(resources.duration,0)),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY name + ORDER BY avg DESC + LIMIT 10;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + if len(rows) == 0: + return [] + ch_sub_query.append(ch_sub_query_chart[-1]) + results = [] + names = [r["name"] for r in rows] + ch_query = f"""SELECT name, + toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + AND name IN %(names)s + GROUP BY name,timestamp + ORDER BY name,timestamp;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "names": names, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + charts = ch.execute(query=ch_query, params=params) + for r in rows: + sub_chart = [] + for c in charts: + if c["name"] == r["name"]: + cc = dict(c) + cc.pop("name") + sub_chart.append(cc) + elif len(sub_chart) > 0: + break + r["chart"] = __complete_missing_steps(rows=sub_chart, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"avg": 0}) + r["type"] = __get_resource_type_from_db_type(r["type"]) + results.append(r) + + return results + + +def get_sessions_location(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT user_country, COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + GROUP BY user_country + ORDER BY user_country;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return {"count": sum(i["count"] for i in rows), "chart": helper.list_to_camel_case(rows)} + + +def get_speed_index_location(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query.append("isNotNull(pages.speed_index)") + ch_sub_query.append("pages.speed_index>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT sessions.user_country, COALESCE(avgOrNull(pages.speed_index),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + INNER JOIN {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions USING (session_id) + WHERE {" AND ".join(ch_sub_query)} + GROUP BY sessions.user_country + ORDER BY value ,sessions.user_country;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(pages.speed_index),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"value": avg, "chart": helper.list_to_camel_case(rows), "unit": schemas.TemplatePredefinedUnits.millisecond} + + +def get_pages_response_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, url=None, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + ch_sub_query_chart.append("isNotNull(pages.response_time)") + ch_sub_query_chart.append("pages.response_time>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + if url is not None: + ch_sub_query_chart.append(f"url_path = %(value)s") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.response_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": url, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(pages.response_time),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + results = {"value": avg, + "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0})} + helper.__time_value(results) + return results + + +def get_pages_response_time_distribution(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=20, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query.append("isNotNull(pages.response_time)") + ch_sub_query.append("pages.response_time>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT pages.response_time AS response_time, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} + GROUP BY response_time + ORDER BY response_time;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(pages.response_time),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] + quantiles_keys = [50, 90, 95, 99] + ch_query = f"""SELECT quantilesExact({",".join([str(i / 100) for i in quantiles_keys])})(pages.response_time) AS values + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + quantiles = ch.execute(query=ch_query, params=params) + result = { + "value": avg, + "total": sum(r["count"] for r in rows), + "chart": [], + "percentiles": [{ + "percentile": v, + "responseTime": ( + quantiles[0]["values"][i] if quantiles[0]["values"][i] is not None and not math.isnan( + quantiles[0]["values"][i]) else 0)} for i, v in enumerate(quantiles_keys) + ], + "extremeValues": [{"count": 0}], + "unit": schemas.TemplatePredefinedUnits.millisecond + } + if len(rows) > 0: + rows = helper.list_to_camel_case(rows) + _99 = result["percentiles"][-1]["responseTime"] + extreme_values_first_index = -1 + for i, r in enumerate(rows): + if r["responseTime"] > _99: + extreme_values_first_index = i + break + + if extreme_values_first_index >= 0: + extreme_values_first_index += 1 + result["extremeValues"][0]["count"] = sum(r["count"] for r in rows[extreme_values_first_index:]) + # result["extremeValues"][0]["responseTime"] = rows[extreme_values_first_index]["responseTime"] + + rows = rows[:extreme_values_first_index] + + # ------- Merge points to reduce chart length till density + if density < len(quantiles_keys): + density = len(quantiles_keys) + + while len(rows) > density: + true_length = len(rows) + rows_partitions = [] + offset = 0 + for p in result["percentiles"]: + rows_partitions.append([]) + for r in rows[offset:]: + if r["responseTime"] < p["responseTime"]: + rows_partitions[-1].append(r) + offset += 1 + else: + break + rows_partitions.append(rows[offset:]) + # print(f"len rows partition: {len(rows_partitions)}") + # for r in rows_partitions: + # print(f"{r[0]} => {sum(v['count'] for v in r)}") + + largest_partition = 0 + for i in range(len(rows_partitions)): + if len(rows_partitions[i]) > len(rows_partitions[largest_partition]): + largest_partition = i + # print(f"largest partition: {len(rows_partitions[largest_partition])}") + + if len(rows_partitions[largest_partition]) <= 2: + break + # computing lowest merge diff + diff = rows[-1]["responseTime"] + for i in range(1, len(rows_partitions[largest_partition]) - 1, 1): + v1 = rows_partitions[largest_partition][i] + v2 = rows_partitions[largest_partition][i + 1] + if (v2["responseTime"] - v1["responseTime"]) < diff: + diff = v2["responseTime"] - v1["responseTime"] + # print(f"lowest merge diff: {diff}") + i = 1 + while i < len(rows_partitions[largest_partition]) - 1 and true_length > density - 1: + v1 = rows_partitions[largest_partition][i] + v2 = rows_partitions[largest_partition][i + 1] + if (v2["responseTime"] - v1["responseTime"]) == diff: + rows_partitions[largest_partition][i]["count"] += v2["count"] + rows_partitions[largest_partition][i]["responseTime"] = v2["responseTime"] + del rows_partitions[largest_partition][i + 1] + true_length -= 1 + else: + i += 1 + + rows = [r for rp in rows_partitions for r in rp] + + if extreme_values_first_index == len(rows): + rows.append({"count": 0, "responseTime": rows[-1]["responseTime"] + 10}) + + result["chart"] = rows + + return result + + +def get_busiest_time_of_day(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT intDiv(toHour(sessions.datetime),2)*2 AS hour, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + GROUP BY hour + ORDER BY hour ASC;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return __complete_missing_steps(rows=rows, start_time=0, end_time=24000, density=12, + neutral={"count": 0}, + time_key="hour", time_coefficient=1) + + +def get_top_metrics(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COALESCE(avgOrNull(if(pages.response_time>0,pages.response_time,null)),0) AS avg_response_time, + COALESCE(avgOrNull(if(pages.first_paint>0,pages.first_paint,null)),0) AS avg_first_paint, + COALESCE(avgOrNull(if(pages.dom_content_loaded_event_time>0,pages.dom_content_loaded_event_time,null)),0) AS avg_dom_content_loaded, + COALESCE(avgOrNull(if(pages.ttfb>0,pages.ttfb,null)),0) AS avg_till_first_bit, + COALESCE(avgOrNull(if(pages.time_to_interactive>0,pages.time_to_interactive,null)),0) AS avg_time_to_interactive, + (SELECT COUNT(1) FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages WHERE {" AND ".join(ch_sub_query)}) AS count_requests + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} + AND (isNotNull(pages.response_time) AND pages.response_time>0 OR + isNotNull(pages.first_paint) AND pages.first_paint>0 OR + isNotNull(pages.dom_content_loaded_event_time) AND pages.dom_content_loaded_event_time>0 OR + isNotNull(pages.ttfb) AND pages.ttfb>0 OR + isNotNull(pages.time_to_interactive) AND pages.time_to_interactive >0);""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return helper.dict_to_camel_case(rows[0]) + + +def get_time_to_render(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, url=None, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + ch_sub_query_chart.append("isNotNull(pages.visually_complete)") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + if url is not None: + ch_sub_query_chart.append("pages.url_path = %(value)s") + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.visually_complete),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, "value": url, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(pages.visually_complete),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + results = {"value": avg, "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, density=density, + neutral={"value": 0})} + helper.__time_value(results) + return results + + +def get_impacted_sessions_by_slow_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query.append("isNotNull(pages.response_time)") + ch_sub_query.append("pages.response_time>0") + sch_sub_query = ch_sub_query[:] + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT pages.session_id) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} + AND (pages.response_time)>(SELECT COALESCE(avgOrNull(pages.response_time),0) + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(sch_sub_query)})*2 + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, + params={"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)}) + return __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, density=density, + neutral={"count": 0}) + + +def get_memory_consumption(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="performance", round_start=True, + data=args) + ch_sub_query_chart.append("performance.event_type='PERFORMANCE'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(performance.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(performance.avg_used_js_heap_size),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS performance + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp ASC;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(performance.avg_used_js_heap_size),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS performance + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"value": avg, + "chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})), + "unit": schemas.TemplatePredefinedUnits.memory} + + +def get_avg_cpu(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="performance", round_start=True, + data=args) + ch_sub_query_chart.append("performance.event_type='PERFORMANCE'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(performance.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(performance.avg_cpu),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS performance + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp ASC;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(performance.avg_cpu),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS performance + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"value": avg, + "chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})), + "unit": schemas.TemplatePredefinedUnits.percentage} + + +def get_avg_fps(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="performance", round_start=True, + data=args) + ch_sub_query_chart.append("performance.event_type='PERFORMANCE'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(performance.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(performance.avg_fps),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS performance + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp ASC;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(performance.avg_fps),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS performance + WHERE {" AND ".join(ch_sub_query_chart)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"value": avg, + "chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})), + "unit": schemas.TemplatePredefinedUnits.frame} + + +def get_crashes(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="sessions", round_start=True, data=args) + ch_sub_query.append("has(sessions.issue_types,'crash')") + ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, + data=args) + ch_sub_query_chart.append("has(sessions.issue_types,'crash')") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(1) AS value + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + if len(rows) == 0: + browsers = [] + else: + ch_query = f"""SELECT b.user_browser AS browser, + sum(bv.count) AS total, + groupArray([bv.user_browser_version, toString(bv.count)]) AS versions + FROM ( + SELECT sessions.user_browser + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + GROUP BY sessions.user_browser + ORDER BY COUNT(1) DESC + LIMIT 3 + ) AS b + INNER JOIN + ( + SELECT sessions.user_browser, + sessions.user_browser_version, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + GROUP BY sessions.user_browser, + sessions.user_browser_version + ORDER BY count DESC + ) AS bv USING (user_browser) + GROUP BY b.user_browser + ORDER BY b.user_browser;""" + browsers = ch.execute(query=ch_query, params=params) + total = sum(r["total"] for r in browsers) + for r in browsers: + r["percentage"] = r["total"] / (total / 100) + versions = [] + for i in range(len(r["versions"][:3])): + versions.append({r["versions"][i][0]: int(r["versions"][i][1]) / (r["total"] / 100)}) + r["versions"] = versions + + result = {"chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0}), + "browsers": browsers, + "unit": schemas.TemplatePredefinedUnits.count} + return result + + +def __get_domains_errors_neutral(rows): + neutral = {l: 0 for l in [i for k in [list(v.keys()) for v in rows] for i in k]} + if len(neutral.keys()) == 0: + neutral = {"All": 0} + return neutral + + +def __merge_rows_with_neutral(rows, neutral): + for i in range(len(rows)): + rows[i] = {**neutral, **rows[i]} + return rows + + +def get_domains_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args) + ch_sub_query.append("requests.event_type='REQUEST'") + ch_sub_query.append("intDiv(requests.status, 100) == %(status_code)s") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT timestamp, + groupArray([domain, toString(count)]) AS keys + FROM (SELECT toUnixTimestamp(toStartOfInterval(requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + requests.url_host AS domain, COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY timestamp,requests.url_host + ORDER BY timestamp, count DESC + LIMIT 5 BY timestamp) AS domain_stats + GROUP BY timestamp;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "step_size": step_size, + "status_code": 4, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + rows = __nested_array_to_dict_array(rows) + neutral = __get_domains_errors_neutral(rows) + rows = __merge_rows_with_neutral(rows, neutral) + + result = {"4xx": __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral=neutral)} + params["status_code"] = 5 + rows = ch.execute(query=ch_query, params=params) + rows = __nested_array_to_dict_array(rows) + neutral = __get_domains_errors_neutral(rows) + rows = __merge_rows_with_neutral(rows, neutral) + result["5xx"] = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral=neutral) + return result + + +def __get_domains_errors_4xx_and_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args) + ch_sub_query.append("requests.event_type='REQUEST'") + ch_sub_query.append("intDiv(requests.status, 100) == %(status_code)s") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT timestamp, + groupArray([domain, toString(count)]) AS keys + FROM (SELECT toUnixTimestamp(toStartOfInterval(requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + requests.url_host AS domain, COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY timestamp,requests.url_host + ORDER BY timestamp, count DESC + LIMIT 5 BY timestamp) AS domain_stats + GROUP BY timestamp;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "step_size": step_size, + "status_code": status, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + rows = __nested_array_to_dict_array(rows) + neutral = __get_domains_errors_neutral(rows) + rows = __merge_rows_with_neutral(rows, neutral) + + return __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral=neutral) + + +def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + return __get_domains_errors_4xx_and_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) + + +def get_domains_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + return __get_domains_errors_4xx_and_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) + + +def __nested_array_to_dict_array(rows): + for r in rows: + for i in range(len(r["keys"])): + r[r["keys"][i][0]] = int(r["keys"][i][1]) + r.pop("keys") + return rows + + +def get_slowest_domains(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + ch_sub_query.append("isNotNull(resources.duration)") + ch_sub_query.append("resources.duration>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT resources.url_host AS domain, + COALESCE(avgOrNull(resources.duration),0) AS value + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY resources.url_host + ORDER BY value DESC + LIMIT 5;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(resources.duration),0) AS avg + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"value": avg, "chart": rows, "unit": schemas.TemplatePredefinedUnits.millisecond} + + +def get_errors_per_domains(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="requests", data=args) + ch_sub_query.append("requests.event_type = 'REQUEST'") + ch_sub_query.append("requests.success = 0") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT + requests.url_host AS domain, + COUNT(1) AS errors_count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY requests.url_host + ORDER BY errors_count DESC + LIMIT 5;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return helper.list_to_camel_case(rows) + + +def get_sessions_per_browser(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, **args): + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT b.user_browser AS browser, + b.count, + groupArray([bv.user_browser_version, toString(bv.count)]) AS versions + FROM + ( + SELECT sessions.user_browser, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + GROUP BY sessions.user_browser + ORDER BY count DESC + LIMIT 3 + ) AS b + INNER JOIN + ( + SELECT sessions.user_browser, + sessions.user_browser_version, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)} + GROUP BY + sessions.user_browser, + sessions.user_browser_version + ORDER BY count DESC + LIMIT 3 + ) AS bv USING (user_browser) + GROUP BY + b.user_browser, b.count + ORDER BY b.count DESC;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + for i, r in enumerate(rows): + versions = {} + for j in range(len(r["versions"])): + versions[r["versions"][j][0]] = int(r["versions"][j][1]) + r.pop("versions") + rows[i] = {**r, **versions} + return {"count": sum(i["count"] for i in rows), "chart": rows} + + +def get_calls_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, **args): + ch_sub_query = __get_basic_constraints(table_name="requests", data=args) + ch_sub_query.append("requests.event_type = 'REQUEST'") + ch_sub_query.append("intDiv(requests.status, 100) != 2") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT requests.method, + requests.url_hostpath, + COUNT(1) AS all_requests, + SUM(if(intDiv(requests.status, 100) == 4, 1, 0)) AS _4xx, + SUM(if(intDiv(requests.status, 100) == 5, 1, 0)) AS _5xx + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY requests.method, requests.url_hostpath + ORDER BY (_4xx + _5xx) DESC, all_requests DESC + LIMIT 50;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return helper.list_to_camel_case(rows) + + +def __get_calls_errors_4xx_or_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + platform=None, **args): + ch_sub_query = __get_basic_constraints(table_name="requests", data=args) + ch_sub_query.append("requests.event_type = 'REQUEST'") + ch_sub_query.append(f"intDiv(requests.status, 100) == {status}") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT requests.method, + requests.url_hostpath, + COUNT(1) AS all_requests + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY requests.method, requests.url_hostpath + ORDER BY all_requests DESC + LIMIT 10;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + return helper.list_to_camel_case(rows) + + +def get_calls_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, **args): + return __get_calls_errors_4xx_or_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, + platform=platform, **args) + + +def get_calls_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, **args): + return __get_calls_errors_4xx_or_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, + platform=platform, **args) + + +def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="events", round_start=True, + data=args) + ch_sub_query_chart.append("(events.event_type = 'REQUEST' OR events.event_type = 'ERROR')") + ch_sub_query_chart.append("(events.status>200 OR events.event_type = 'ERROR')") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + SUM(events.event_type = 'REQUEST' AND intDiv(events.status, 100) == 4) AS _4xx, + SUM(events.event_type = 'REQUEST' AND intDiv(events.status, 100) == 5) AS _5xx, + SUM(events.event_type = 'ERROR' AND events.source == 'js_exception') AS js, + SUM(events.event_type = 'ERROR' AND events.source != 'js_exception') AS integrations + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS events + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + rows = helper.list_to_camel_case(rows) + + return __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"4xx": 0, "5xx": 0, "js": 0, "integrations": 0}) + + +def resource_type_vs_response_end(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + ch_sub_query_chart_response_end = __get_basic_constraints(table_name="pages", round_start=True, + data=args) + ch_sub_query_chart_response_end.append("pages.event_type='LOCATION'") + ch_sub_query_chart_response_end.append("isNotNull(pages.response_end)") + ch_sub_query_chart_response_end.append("pages.response_end>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + ch_sub_query_chart_response_end += meta_condition + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(1) AS total, + SUM(if(resources.type='fetch',1,0)) AS xhr + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + # print(ch.format(query=ch_query, params=params)) + actions = ch.execute(query=ch_query, params=params) + actions = __complete_missing_steps(rows=actions, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"total": 0, "xhr": 0}) + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.response_end),0) AS avg_response_end + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart_response_end)} + GROUP BY timestamp + ORDER BY timestamp;""" + response_end = ch.execute(query=ch_query, params=params) + response_end = __complete_missing_steps(rows=response_end, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"avg_response_end": 0}) + return helper.list_to_camel_case(__merge_charts(response_end, actions)) + + +def get_impacted_sessions_by_js_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args) + ch_sub_query_chart.append("errors.event_type='ERROR'") + ch_sub_query_chart.append("errors.source == 'js_exception'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT errors.session_id) AS sessions_count, + COUNT(DISTINCT errors.error_id) AS errors_count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;;""" + rows = ch.execute(query=ch_query, + params={"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + ch_query = f"""SELECT COUNT(DISTINCT errors.session_id) AS sessions_count, + COUNT(DISTINCT errors.error_id) AS errors_count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query_chart)};""" + counts = ch.execute(query=ch_query, + params={"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + return {"sessionsCount": counts[0]["sessions_count"], + "errorsCount": counts[0]["errors_count"], + "chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"sessions_count": 0, + "errors_count": 0}))} + + +# TODO: super slow (try using sampling) +def get_resources_vs_visually_complete(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(s.base_datetime, toIntervalSecond(%(step_size)s))) * 1000 AS timestamp, + COALESCE(avgOrNull(NULLIF(s.count,0)),0) AS avg, + groupArray([toString(t.type), toString(t.xavg)]) AS types + FROM + ( SELECT resources.session_id, + MIN(resources.datetime) AS base_datetime, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY resources.session_id + ) AS s + INNER JOIN + (SELECT session_id, + type, + COALESCE(avgOrNull(NULLIF(count,0)),0) AS xavg + FROM (SELECT resources.session_id, resources.type, COUNT(1) AS count + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)} + GROUP BY resources.session_id, resources.type) AS ss + GROUP BY ss.session_id, ss.type) AS t USING (session_id) + GROUP BY timestamp + ORDER BY timestamp ASC;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(">>>>>>>>>>>>>>") + # print(ch.format(query=ch_query, params=params)) + # print(">>>>>>>>>>>>>>") + rows = ch.execute(query=ch_query, params=params) + for r in rows: + types = {} + for i in range(len(r["types"])): + if r["types"][i][0] not in types: + types[r["types"][i][0]] = [] + types[r["types"][i][0]].append(float(r["types"][i][1])) + for i in types: + types[i] = sum(types[i]) / len(types[i]) + r["types"] = types + resources = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"avg": 0, "types": {}}) + time_to_render = get_time_to_render(project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, + **args) + + return helper.list_to_camel_case( + __merge_charts( + [{"timestamp": i["timestamp"], "avgCountResources": i["avg"], "types": i["types"]} for i in resources], + [{"timestamp": i["timestamp"], "avgTimeToRender": i["value"]} for i in time_to_render["chart"]])) + + +def get_resources_count_by_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT timestamp, + groupArray([toString(t.type), toString(t.count)]) AS types + FROM(SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + resources.type, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp,resources.type + ORDER BY timestamp) AS t + GROUP BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + for r in rows: + for t in r["types"]: + r[t[0]] = t[1] + r.pop("types") + return __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={k: 0 for k in RESOURCS_TYPE_TO_DB_TYPE.keys()}) + + +def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args) + ch_sub_query.append("requests.event_type='REQUEST'") + ch_sub_query.append("requests.success = 0") + sch_sub_query = ["rs.project_id =toUInt16(%(project_id)s)", "rs.event_type='REQUEST'"] + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + # sch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + SUM(first.url_host = sub_requests.url_host) AS first_party, + SUM(first.url_host != sub_requests.url_host) AS third_party + FROM + ( + SELECT requests.datetime, requests.url_host + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + ) AS sub_requests + CROSS JOIN + ( + SELECT + rs.url_host, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS rs + WHERE {" AND ".join(sch_sub_query)} + GROUP BY rs.url_host + ORDER BY count DESC + LIMIT 1 + ) AS first + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + return helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"first_party": 0, + "third_party": 0})) + + +def get_application_activity_avg_page_load_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + row = __get_application_activity_avg_page_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + results = helper.dict_to_camel_case(row) + results["chart"] = get_performance_avg_page_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + row = __get_application_activity_avg_page_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + previous = helper.dict_to_camel_case(row) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + helper.__time_value(results) + return results + + +def __get_application_activity_avg_page_load_time(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("pages.load_event_end>0") + ch_query = f"""SELECT COALESCE(avgOrNull(pages.load_event_end),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + row = ch.execute(query=ch_query, params=params)[0] + result = row + for k in result: + if result[k] is None: + result[k] = 0 + return result + + +def get_performance_avg_page_load_time(ch, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=19, resources=None, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + location_constraints = [] + meta_condition = __get_meta_constraint(args) + + location_constraints_vals = {} + + if resources and len(resources) > 0: + for r in resources: + if r["type"] == "LOCATION": + location_constraints.append(f"pages.url_path = %(val_{len(location_constraints)})s") + location_constraints_vals["val_" + str(len(location_constraints) - 1)] = r['value'] + + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, + data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + ch_sub_query_chart += meta_condition + ch_sub_query_chart.append("pages.load_event_end>0") + + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(pages.load_event_end),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + {(f' AND ({" OR ".join(location_constraints)})') if len(location_constraints) > 0 else ""} + GROUP BY timestamp + ORDER BY timestamp;""" + + rows = ch.execute(query=ch_query, params={**params, **location_constraints_vals, **__get_constraint_values(args)}) + pages = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + + # for s in pages: + # for k in s: + # if s[k] is None: + # s[k] = 0 + return pages + + +def get_application_activity_avg_image_load_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + row = __get_application_activity_avg_image_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + results = helper.dict_to_camel_case(row) + results["chart"] = get_performance_avg_image_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + row = __get_application_activity_avg_image_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + previous = helper.dict_to_camel_case(row) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + helper.__time_value(results) + return results + + +def __get_application_activity_avg_image_load_time(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("resources.type= %(type)s") + ch_sub_query.append("resources.duration>0") + ch_query = f"""\ + SELECT COALESCE(avgOrNull(resources.duration),0) AS value + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)};""" + row = ch.execute(query=ch_query, + params={"project_id": project_id, "type": 'img', "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)})[0] + result = row + # for k in result: + # if result[k] is None: + # result[k] = 0 + return result + + +def get_performance_avg_image_load_time(ch, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=19, resources=None, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + img_constraints = [] + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + img_constraints_vals = {} + + if resources and len(resources) > 0: + for r in resources: + if r["type"] == "IMG": + img_constraints.append(f"resources.url = %(val_{len(img_constraints)})s") + img_constraints_vals["val_" + str(len(img_constraints) - 1)] = r['value'] + + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + ch_sub_query_chart.append("resources.duration>0") + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS value + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + AND resources.type = 'img' + {(f' AND ({" OR ".join(img_constraints)})') if len(img_constraints) > 0 else ""} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **img_constraints_vals, **__get_constraint_values(args)}) + images = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + + # for s in images: + # for k in s: + # if s[k] is None: + # s[k] = 0 + return images + + +def get_application_activity_avg_request_load_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + row = __get_application_activity_avg_request_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + results = helper.dict_to_camel_case(row) + results["chart"] = get_performance_avg_request_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + row = __get_application_activity_avg_request_load_time(ch, project_id, startTimestamp, endTimestamp, **args) + previous = helper.dict_to_camel_case(row) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + helper.__time_value(results) + return results + + +def __get_application_activity_avg_request_load_time(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="resources", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("resources.type= %(type)s") + ch_sub_query.append("resources.duration>0") + ch_query = f"""SELECT COALESCE(avgOrNull(resources.duration),0) AS value + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query)};""" + row = ch.execute(query=ch_query, + params={"project_id": project_id, "type": 'fetch', "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)})[0] + result = row + # for k in result: + # if result[k] is None: + # result[k] = 0 + return result + + +def get_performance_avg_request_load_time(ch, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=19, resources=None, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + request_constraints = [] + ch_sub_query_chart = __get_basic_constraints(table_name="resources", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + request_constraints_vals = {} + + if resources and len(resources) > 0: + for r in resources: + if r["type"] != "IMG" and r["type"] == "LOCATION": + request_constraints.append(f"resources.url = %(val_{len(request_constraints)})s") + request_constraints_vals["val_" + str(len(request_constraints) - 1)] = r['value'] + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + ch_sub_query_chart.append("resources.duration>0") + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(resources.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(resources.duration),0) AS value + FROM {exp_ch_helper.get_main_resources_table(startTimestamp)} AS resources + WHERE {" AND ".join(ch_sub_query_chart)} + AND resources.type = 'fetch' + {(f' AND ({" OR ".join(request_constraints)})') if len(request_constraints) > 0 else ""} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, + params={**params, **request_constraints_vals, **__get_constraint_values(args)}) + requests = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, density=density, + neutral={"value": 0}) + + # for s in requests: + # for k in s: + # if s[k] is None: + # s[k] = 0 + return requests + + +def get_page_metrics_avg_dom_content_load_start(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + results = {} + rows = __get_page_metrics_avg_dom_content_load_start(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + results["chart"] = __get_page_metrics_avg_dom_content_load_start_chart(ch, project_id, startTimestamp, + endTimestamp, **args) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_page_metrics_avg_dom_content_load_start(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + helper.__time_value(results) + return results + + +def __get_page_metrics_avg_dom_content_load_start(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("pages.dom_content_loaded_event_end>0") + ch_query = f"""SELECT COALESCE(avgOrNull(pages.dom_content_loaded_event_end),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "type": 'fetch', "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return rows + + +def __get_page_metrics_avg_dom_content_load_start_chart(ch, project_id, startTimestamp, endTimestamp, density=19, + **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + ch_sub_query_chart.append("pages.dom_content_loaded_event_end>0") + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(pages.dom_content_loaded_event_end),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + + # for s in rows: + # for k in s: + # if s[k] is None: + # s[k] = 0 + return rows + + +def get_page_metrics_avg_first_contentful_pixel(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + with ch_client.ClickHouseClient() as ch: + rows = __get_page_metrics_avg_first_contentful_pixel(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + results["chart"] = __get_page_metrics_avg_first_contentful_pixel_chart(ch, project_id, startTimestamp, + endTimestamp, **args) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_page_metrics_avg_first_contentful_pixel(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + helper.__time_value(results) + return results + + +def __get_page_metrics_avg_first_contentful_pixel(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("pages.first_contentful_paint_time>0") + # changed dom_content_loaded_event_start to dom_content_loaded_event_end + ch_query = f"""\ + SELECT COALESCE(avgOrNull(pages.first_contentful_paint_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "type": 'fetch', "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + return rows + + +def __get_page_metrics_avg_first_contentful_pixel_chart(ch, project_id, startTimestamp, endTimestamp, density=20, + **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + ch_sub_query_chart.append("pages.first_contentful_paint_time>0") + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(pages.first_contentful_paint_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + return rows + + +def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + results = {} + + with ch_client.ClickHouseClient() as ch: + rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + for key in results: + if isnan(results[key]): + results[key] = 0 + results["chart"] = __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, + endTimestamp, **args) + + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args) + + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + results["unit"] = schemas.TemplatePredefinedUnits.count + return results + + +def __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(count)),0) AS value + FROM (SELECT COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} + GROUP BY session_id) AS groupped_data + WHERE count>0;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + return rows + + +def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, endTimestamp, density=20, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + ch_query = f"""SELECT timestamp, COALESCE(avgOrNull(count), 0) AS value + FROM (SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + session_id, COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp,session_id + ORDER BY timestamp) AS groupped_data + WHERE count>0 + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params=params) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + return rows + + +def get_user_activity_avg_session_duration(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + results = {} + + with ch_client.ClickHouseClient() as ch: + rows = __get_user_activity_avg_session_duration(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + for key in results: + if isnan(results[key]): + results[key] = 0 + results["chart"] = __get_user_activity_avg_session_duration_chart(ch, project_id, startTimestamp, + endTimestamp, **args) + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_user_activity_avg_session_duration(ch, project_id, startTimestamp, endTimestamp, **args) + + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + helper.__time_value(results) + return results + + +def __get_user_activity_avg_session_duration(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query.append("isNotNull(sessions.duration)") + ch_sub_query.append("sessions.duration>0") + + ch_query = f"""SELECT COALESCE(avgOrNull(sessions.duration),0) AS value + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + return rows + + +def __get_user_activity_avg_session_duration_chart(ch, project_id, startTimestamp, endTimestamp, density=20, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + ch_sub_query_chart.append("isNotNull(sessions.duration)") + ch_sub_query_chart.append("sessions.duration>0") + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp} + + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(sessions.duration),0) AS value + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + + rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + return rows + + +def get_top_metrics_avg_response_time(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=20, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COALESCE(avgOrNull(pages.response_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.response_time) AND pages.response_time>0;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + results = rows[0] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COALESCE(avgOrNull(pages.response_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} AND isNotNull(pages.response_time) AND pages.response_time>0 + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + results["chart"] = rows + helper.__time_value(results) + return helper.dict_to_camel_case(results) + + +def get_top_metrics_count_requests(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=20, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COUNT(1) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + result = rows[0] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COUNT(1) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + result["chart"] = rows + result["unit"] = schemas.TemplatePredefinedUnits.count + return helper.dict_to_camel_case(result) + + +def get_top_metrics_avg_first_paint(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=20, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COALESCE(avgOrNull(pages.first_paint),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} AND isNotNull(pages.first_paint) AND pages.first_paint>0;""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + results = rows[0] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.first_paint),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} AND isNotNull(pages.first_paint) AND pages.first_paint>0 + GROUP BY timestamp + ORDER BY timestamp;;""" + rows = ch.execute(query=ch_query, params=params) + results["chart"] = helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})) + + helper.__time_value(results) + return helper.dict_to_camel_case(results) + + +def get_top_metrics_avg_dom_content_loaded(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=19, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + ch_sub_query.append("isNotNull(pages.dom_content_loaded_event_time)") + ch_sub_query.append("pages.dom_content_loaded_event_time>0") + ch_sub_query_chart.append("isNotNull(pages.dom_content_loaded_event_time)") + ch_sub_query_chart.append("pages.dom_content_loaded_event_time>0") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COALESCE(avgOrNull(pages.dom_content_loaded_event_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + results = helper.dict_to_camel_case(rows[0]) + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.dom_content_loaded_event_time),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params=params) + results["chart"] = helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})) + helper.__time_value(results) + return results + + +def get_top_metrics_avg_till_first_bit(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=20, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + ch_sub_query.append("isNotNull(pages.ttfb)") + ch_sub_query.append("pages.ttfb>0") + ch_sub_query_chart.append("isNotNull(pages.ttfb)") + ch_sub_query_chart.append("pages.ttfb>0") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COALESCE(avgOrNull(pages.ttfb),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + results = rows[0] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.ttfb),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params=params) + results["chart"] = helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})) + helper.__time_value(results) + return helper.dict_to_camel_case(results) + + +def get_top_metrics_avg_time_to_interactive(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=20, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + ch_sub_query.append("isNotNull(pages.time_to_interactive)") + ch_sub_query.append("pages.time_to_interactive >0") + ch_sub_query_chart.append("isNotNull(pages.time_to_interactive)") + ch_sub_query_chart.append("pages.time_to_interactive >0") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COALESCE(avgOrNull(pages.time_to_interactive),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + results = rows[0] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COALESCE(avgOrNull(pages.time_to_interactive),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params=params) + results["chart"] = helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"value": 0})) + helper.__time_value(results) + return helper.dict_to_camel_case(results) diff --git a/ee/api/chalicelib/core/projects.py b/ee/api/chalicelib/core/projects.py index 6700173b5..9e5600865 100644 --- a/ee/api/chalicelib/core/projects.py +++ b/ee/api/chalicelib/core/projects.py @@ -52,29 +52,57 @@ def get_projects(tenant_id, recording_state=False, gdpr=None, recorded=False, st AND users.tenant_id = %(tenant_id)s AND (roles.all_projects OR roles_projects.project_id = s.project_id) ) AS role_project ON (TRUE)""" - recorded_q = "" + extra_projection = "" + extra_join = "" + if gdpr: + extra_projection += ',s.gdpr' if recorded: - recorded_q = """, COALESCE((SELECT TRUE - FROM public.sessions - WHERE sessions.project_id = s.project_id - AND sessions.start_ts >= (EXTRACT(EPOCH FROM s.created_at) * 1000 - 24 * 60 * 60 * 1000) - AND sessions.start_ts <= %(now)s - LIMIT 1), FALSE) AS recorded""" - query = cur.mogrify(f"""\ - SELECT - s.project_id, s.name, s.project_key, s.save_request_payloads - {',s.gdpr' if gdpr else ''} - {recorded_q} - {',stack_integrations.count>0 AS stack_integrations' if stack_integrations else ''} - FROM public.projects AS s - {'LEFT JOIN LATERAL (SELECT COUNT(*) AS count FROM public.integrations WHERE s.project_id = integrations.project_id LIMIT 1) AS stack_integrations ON TRUE' if stack_integrations else ''} - {role_query if user_id is not None else ""} - WHERE s.tenant_id =%(tenant_id)s - AND s.deleted_at IS NULL - ORDER BY s.project_id;""", + extra_projection += """,COALESCE(nullif(EXTRACT(EPOCH FROM s.first_recorded_session_at) * 1000, NULL)::BIGINT , + (SELECT MIN(sessions.start_ts) + FROM public.sessions + WHERE sessions.project_id = s.project_id + AND sessions.start_ts >= (EXTRACT(EPOCH FROM + COALESCE(s.sessions_last_check_at, s.created_at)) * 1000-24*60*60*1000) + AND sessions.start_ts <= %(now)s + LIMIT 1), NULL) AS first_recorded""" + if stack_integrations: + extra_projection += ',stack_integrations.count>0 AS stack_integrations' + + if stack_integrations: + extra_join = """LEFT JOIN LATERAL (SELECT COUNT(*) AS count + FROM public.integrations + WHERE s.project_id = integrations.project_id + LIMIT 1) AS stack_integrations ON TRUE""" + + query = cur.mogrify(f"""{"SELECT *, first_recorded IS NOT NULL AS recorded FROM (" if recorded else ""} + SELECT s.project_id, s.name, s.project_key, s.save_request_payloads, s.first_recorded_session_at + {extra_projection} + FROM public.projects AS s + {extra_join} + {role_query if user_id is not None else ""} + WHERE s.tenant_id =%(tenant_id)s + AND s.deleted_at IS NULL + ORDER BY s.project_id {") AS raw" if recorded else ""};""", {"tenant_id": tenant_id, "user_id": user_id, "now": TimeUTC.now()}) cur.execute(query) rows = cur.fetchall() + + # if recorded is requested, check if it was saved or computed + if recorded: + for r in rows: + if r["first_recorded_session_at"] is None: + extra_update = "" + if r["recorded"]: + extra_update = ", first_recorded_session_at=to_timestamp(%(first_recorded)s/1000)" + query = cur.mogrify(f"""UPDATE public.projects + SET sessions_last_check_at=(now() at time zone 'utc') + {extra_update} + WHERE project_id=%(project_id)s""", + {"project_id": r["project_id"], "first_recorded": r["first_recorded"]}) + cur.execute(query) + r.pop("first_recorded_session_at") + r.pop("first_recorded") + if recording_state: project_ids = [f'({r["project_id"]})' for r in rows] query = cur.mogrify(f"""SELECT projects.project_id, COALESCE(MAX(start_ts), 0) AS last diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py new file mode 100644 index 000000000..5973a3a8e --- /dev/null +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -0,0 +1,2316 @@ +from typing import List, Union + +import schemas +import schemas_ee +from chalicelib.core import events, metadata, events_ios, \ + sessions_mobs, issues, projects, errors, resources, assist, performance_event, metrics +from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper +from chalicelib.utils.TimeUTC import TimeUTC + +SESSION_PROJECTION_COLS = """\ +s.project_id, +s.session_id::text AS session_id, +s.user_uuid, +s.user_id, +s.user_os, +s.user_browser, +s.user_device, +s.user_device_type, +s.user_country, +s.start_ts, +s.duration, +s.events_count, +s.pages_count, +s.errors_count, +s.user_anonymous_id, +s.platform, +s.issue_score, +to_jsonb(s.issue_types) AS issue_types, +favorite_sessions.session_id NOTNULL AS favorite, +COALESCE((SELECT TRUE + FROM public.user_viewed_sessions AS fs + WHERE s.session_id = fs.session_id + AND fs.user_id = %(userId)s LIMIT 1), FALSE) AS viewed + """ + +SESSION_PROJECTION_COLS_CH = """\ +s.project_id, +s.session_id AS session_id, +s.user_uuid AS user_uuid, +s.user_id AS user_id, +s.user_os AS user_os, +s.user_browser AS user_browser, +s.user_device AS user_device, +s.user_device_type AS user_device_type, +s.user_country AS user_country, +toUnixTimestamp(s.datetime)*1000 AS start_ts, +s.duration AS duration, +s.events_count AS events_count, +s.pages_count AS pages_count, +s.errors_count AS errors_count, +s.user_anonymous_id AS user_anonymous_id, +s.platform AS platform, +0 AS issue_score, +s.issue_types AS issue_types, +-- , +-- to_jsonb(s.issue_types) AS issue_types, +isNotNull(favorite_sessions.session_id) AS favorite, +-- COALESCE((SELECT TRUE +-- FROM public.user_viewed_sessions AS fs +-- WHERE s.session_id = fs.session_id +-- AND fs.user_id = %(userId)s + AND fs.project_id = %(project_id)s LIMIT 1), FALSE) AS viewed + """ + +SESSION_PROJECTION_COLS_CH_MAP = """\ +'project_id', toString(%(project_id)s), +'session_id', toString(s.session_id), +'user_uuid', toString(s.user_uuid), +'user_id', toString(s.user_id), +'user_os', toString(s.user_os), +'user_browser', toString(s.user_browser), +'user_device', toString(s.user_device), +'user_device_type', toString(s.user_device_type), +'user_country', toString(s.user_country), +'start_ts', toString(toUnixTimestamp(s.datetime)*1000), +'duration', toString(s.duration), +'events_count', toString(s.events_count), +'pages_count', toString(s.pages_count), +'errors_count', toString(s.errors_count), +'user_anonymous_id', toString(s.user_anonymous_id), +'platform', toString(s.platform), +'issue_score', '0', +'favorite', toString(isNotNull(favorite_sessions.session_id)) +""" + + +def __group_metadata(session, project_metadata): + meta = {} + for m in project_metadata.keys(): + if project_metadata[m] is not None and session.get(m) is not None: + meta[project_metadata[m]] = session[m] + session.pop(m) + return meta + + +def get_by_id2_pg(project_id, session_id, user_id, full_data=False, include_fav_viewed=False, group_metadata=False, + live=True): + with pg_client.PostgresClient() as cur: + extra_query = [] + if include_fav_viewed: + extra_query.append("""COALESCE((SELECT TRUE + FROM public.user_favorite_sessions AS fs + WHERE s.session_id = fs.session_id + AND fs.user_id = %(userId)s), FALSE) AS favorite""") + extra_query.append("""COALESCE((SELECT TRUE + FROM public.user_viewed_sessions AS fs + WHERE s.session_id = fs.session_id + AND fs.user_id = %(userId)s), FALSE) AS viewed""") + query = cur.mogrify( + f"""\ + SELECT + s.*, + s.session_id::text AS session_id, + (SELECT project_key FROM public.projects WHERE project_id = %(project_id)s LIMIT 1) AS project_key + {"," if len(extra_query) > 0 else ""}{",".join(extra_query)} + {(",json_build_object(" + ",".join([f"'{m}',p.{m}" for m in metadata._get_column_names()]) + ") AS project_metadata") if group_metadata else ''} + FROM public.sessions AS s {"INNER JOIN public.projects AS p USING (project_id)" if group_metadata else ""} + WHERE s.project_id = %(project_id)s + AND s.session_id = %(session_id)s;""", + {"project_id": project_id, "session_id": session_id, "userId": user_id} + ) + # print("===============") + # print(query) + cur.execute(query=query) + + data = cur.fetchone() + if data is not None: + data = helper.dict_to_camel_case(data) + if full_data: + if data["platform"] == 'ios': + data['events'] = events_ios.get_by_sessionId(project_id=project_id, session_id=session_id) + for e in data['events']: + if e["type"].endswith("_IOS"): + e["type"] = e["type"][:-len("_IOS")] + data['crashes'] = events_ios.get_crashes_by_session_id(session_id=session_id) + data['userEvents'] = events_ios.get_customs_by_sessionId(project_id=project_id, + session_id=session_id) + data['mobsUrl'] = sessions_mobs.get_ios(sessionId=session_id) + else: + data['events'] = events.get_by_sessionId2_pg(project_id=project_id, session_id=session_id, + group_clickrage=True) + all_errors = events.get_errors_by_session_id(session_id=session_id, project_id=project_id) + data['stackEvents'] = [e for e in all_errors if e['source'] != "js_exception"] + # to keep only the first stack + data['errors'] = [errors.format_first_stack_frame(e) for e in all_errors if + e['source'] == "js_exception"][ + :500] # limit the number of errors to reduce the response-body size + data['userEvents'] = events.get_customs_by_sessionId2_pg(project_id=project_id, + session_id=session_id) + data['mobsUrl'] = sessions_mobs.get_web(sessionId=session_id) + data['resources'] = resources.get_by_session_id(session_id=session_id, project_id=project_id, + start_ts=data["startTs"], + duration=data["duration"]) + + data['metadata'] = __group_metadata(project_metadata=data.pop("projectMetadata"), session=data) + data['issues'] = issues.get_by_session_id(session_id=session_id, project_id=project_id) + data['live'] = live and assist.is_live(project_id=project_id, + session_id=session_id, + project_key=data["projectKey"]) + data["inDB"] = True + return data + elif live: + return assist.get_live_session_by_id(project_id=project_id, session_id=session_id) + else: + return None + + +def __get_sql_operator(op: schemas.SearchEventOperator): + return { + schemas.SearchEventOperator._is: "=", + schemas.SearchEventOperator._is_any: "IN", + schemas.SearchEventOperator._on: "=", + schemas.SearchEventOperator._on_any: "IN", + schemas.SearchEventOperator._is_not: "!=", + schemas.SearchEventOperator._not_on: "!=", + schemas.SearchEventOperator._contains: "ILIKE", + schemas.SearchEventOperator._not_contains: "NOT ILIKE", + schemas.SearchEventOperator._starts_with: "ILIKE", + schemas.SearchEventOperator._ends_with: "ILIKE", + }.get(op, "=") + + +def __is_negation_operator(op: schemas.SearchEventOperator): + return op in [schemas.SearchEventOperator._is_not, + schemas.SearchEventOperator._not_on, + schemas.SearchEventOperator._not_contains] + + +def __reverse_sql_operator(op): + return "=" if op == "!=" else "!=" if op == "=" else "ILIKE" if op == "NOT ILIKE" else "NOT ILIKE" + + +def __get_sql_operator_multiple(op: schemas.SearchEventOperator): + return " IN " if op not in [schemas.SearchEventOperator._is_not, schemas.SearchEventOperator._not_on, + schemas.SearchEventOperator._not_contains] else " NOT IN " + + +def __get_sql_value_multiple(values): + if isinstance(values, tuple): + return values + return tuple(values) if isinstance(values, list) else (values,) + + +def _multiple_conditions(condition, values, value_key="value", is_not=False): + query = [] + for i in range(len(values)): + k = f"{value_key}_{i}" + query.append(condition.replace(value_key, k)) + return "(" + (" AND " if is_not else " OR ").join(query) + ")" + + +def _multiple_values(values, value_key="value"): + query_values = {} + if values is not None and isinstance(values, list): + for i in range(len(values)): + k = f"{value_key}_{i}" + query_values[k] = values[i] + return query_values + + +def _isAny_opreator(op: schemas.SearchEventOperator): + return op in [schemas.SearchEventOperator._on_any, schemas.SearchEventOperator._is_any] + + +def _isUndefined_operator(op: schemas.SearchEventOperator): + return op in [schemas.SearchEventOperator._is_undefined] + + +# This function executes the query and return result +def search_sessions_pg(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False, + error_status=schemas.ErrorStatus.all, count_only=False, issue=None): + full_args, query_part = search_query_parts(data=data, error_status=error_status, errors_only=errors_only, + favorite_only=data.bookmarked, issue=issue, project_id=project_id, + user_id=user_id) + if data.limit is not None and data.page is not None: + full_args["sessions_limit_s"] = (data.page - 1) * data.limit + full_args["sessions_limit_e"] = data.page * data.limit + else: + full_args["sessions_limit_s"] = 1 + full_args["sessions_limit_e"] = 200 + + meta_keys = [] + with pg_client.PostgresClient() as cur: + if count_only: + main_query = cur.mogrify(f"""SELECT COUNT(DISTINCT s.session_id) AS count_sessions, + COUNT(DISTINCT s.user_uuid) AS count_users + {query_part};""", full_args) + elif data.group_by_user: + g_sort = "count(full_sessions)" + if data.order is None: + data.order = schemas.SortOrderType.desc + else: + data.order = data.order.upper() + if data.sort is not None and data.sort != 'sessionsCount': + sort = helper.key_to_snake_case(data.sort) + g_sort = f"{'MIN' if data.order == schemas.SortOrderType.desc else 'MAX'}({sort})" + else: + sort = 'start_ts' + + meta_keys = metadata.get(project_id=project_id) + main_query = cur.mogrify(f"""SELECT COUNT(*) AS count, + COALESCE(JSONB_AGG(users_sessions) + FILTER (WHERE rn>%(sessions_limit_s)s AND rn<=%(sessions_limit_e)s), '[]'::JSONB) AS sessions + FROM (SELECT user_id, + count(full_sessions) AS user_sessions_count, + jsonb_agg(full_sessions) FILTER (WHERE rn <= 1) AS last_session, + MIN(full_sessions.start_ts) AS first_session_ts, + ROW_NUMBER() OVER (ORDER BY {g_sort} {data.order}) AS rn + FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY {sort} {data.order}) AS rn + FROM (SELECT DISTINCT ON(s.session_id) {SESSION_PROJECTION_COLS} + {"," if len(meta_keys) > 0 else ""}{",".join([f'metadata_{m["index"]}' for m in meta_keys])} + {query_part} + ) AS filtred_sessions + ) AS full_sessions + GROUP BY user_id + ) AS users_sessions;""", + full_args) + else: + if data.order is None: + data.order = schemas.SortOrderType.desc + sort = 'session_id' + if data.sort is not None and data.sort != "session_id": + # sort += " " + data.order + "," + helper.key_to_snake_case(data.sort) + sort = helper.key_to_snake_case(data.sort) + + meta_keys = metadata.get(project_id=project_id) + main_query = cur.mogrify(f"""SELECT COUNT(full_sessions) AS count, + COALESCE(JSONB_AGG(full_sessions) + FILTER (WHERE rn>%(sessions_limit_s)s AND rn<=%(sessions_limit_e)s), '[]'::JSONB) AS sessions + FROM (SELECT *, ROW_NUMBER() OVER (ORDER BY {sort} {data.order}, issue_score DESC) AS rn + FROM (SELECT DISTINCT ON(s.session_id) {SESSION_PROJECTION_COLS} + {"," if len(meta_keys) > 0 else ""}{",".join([f'metadata_{m["index"]}' for m in meta_keys])} + {query_part} + ORDER BY s.session_id desc) AS filtred_sessions + ORDER BY {sort} {data.order}, issue_score DESC) AS full_sessions;""", + full_args) + print("--------------------") + print(main_query) + print("--------------------") + try: + cur.execute(main_query) + except Exception as err: + print("--------- SESSIONS SEARCH QUERY EXCEPTION -----------") + print(main_query.decode('UTF-8')) + print("--------- PAYLOAD -----------") + print(data.json()) + print("--------------------") + raise err + if errors_only: + return helper.list_to_camel_case(cur.fetchall()) + + sessions = cur.fetchone() + if count_only: + return helper.dict_to_camel_case(sessions) + + total = sessions["count"] + sessions = sessions["sessions"] + + if data.group_by_user: + for i, s in enumerate(sessions): + sessions[i] = {**s.pop("last_session")[0], **s} + sessions[i].pop("rn") + sessions[i]["metadata"] = {k["key"]: sessions[i][f'metadata_{k["index"]}'] for k in meta_keys \ + if sessions[i][f'metadata_{k["index"]}'] is not None} + else: + for i, s in enumerate(sessions): + sessions[i]["metadata"] = {k["key"]: sessions[i][f'metadata_{k["index"]}'] for k in meta_keys \ + if sessions[i][f'metadata_{k["index"]}'] is not None} + # if not data.group_by_user and data.sort is not None and data.sort != "session_id": + # sessions = sorted(sessions, key=lambda s: s[helper.key_to_snake_case(data.sort)], + # reverse=data.order.upper() == "DESC") + return { + 'total': total, + 'sessions': helper.list_to_camel_case(sessions) + } + + +# This function executes the query and return result +def search_sessions(data: schemas.SessionsSearchPayloadSchema, project_id, user_id, errors_only=False, + error_status=schemas.ErrorStatus.all, count_only=False, issue=None): + print("------ search2_ch") + full_args, query_part = search_query_parts_ch(data=data, error_status=error_status, errors_only=errors_only, + favorite_only=data.bookmarked, issue=issue, project_id=project_id, + user_id=user_id) + if data.sort == "startTs": + data.sort = "datetime" + if data.limit is not None and data.page is not None: + full_args["sessions_limit_s"] = (data.page - 1) * data.limit + full_args["sessions_limit_e"] = data.page * data.limit + full_args["sessions_limit"] = data.limit + else: + full_args["sessions_limit_s"] = 1 + full_args["sessions_limit_e"] = 200 + full_args["sessions_limit"] = 200 + + meta_keys = [] + with ch_client.ClickHouseClient() as cur: + if errors_only: + print("--------------------QP") + print(cur.format(query_part, full_args)) + print("--------------------") + main_query = cur.format(f"""SELECT DISTINCT er.error_id, + COALESCE((SELECT TRUE + FROM final.user_viewed_errors AS ve + WHERE er.error_id = ve.error_id + AND ve.user_id = %(userId)s LIMIT 1), FALSE) AS viewed + {query_part};""", full_args) + + elif count_only: + main_query = cur.mogrify(f"""SELECT COUNT(DISTINCT s.session_id) AS count_sessions, + COUNT(DISTINCT s.user_uuid) AS count_users + {query_part};""", full_args) + elif data.group_by_user: + g_sort = "count(full_sessions)" + if data.order is None: + data.order = schemas.SortOrderType.desc + else: + data.order = data.order.upper() + if data.sort is not None and data.sort != 'sessionsCount': + sort = helper.key_to_snake_case(data.sort) + g_sort = f"{'MIN' if data.order == schemas.SortOrderType.desc else 'MAX'}({sort})" + else: + sort = 'start_ts' + + meta_keys = metadata.get(project_id=project_id) + main_query = cur.mogrify(f"""SELECT COUNT(*) AS count, + COALESCE(JSONB_AGG(users_sessions) + FILTER (WHERE rn>%(sessions_limit_s)s AND rn<=%(sessions_limit_e)s), '[]'::JSONB) AS sessions + FROM (SELECT user_id, + count(full_sessions) AS user_sessions_count, + jsonb_agg(full_sessions) FILTER (WHERE rn <= 1) AS last_session, + MIN(full_sessions.start_ts) AS first_session_ts, + ROW_NUMBER() OVER (ORDER BY {g_sort} {data.order}) AS rn + FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY {sort} {data.order}) AS rn + FROM (SELECT DISTINCT ON(s.session_id) {SESSION_PROJECTION_COLS} + {"," if len(meta_keys) > 0 else ""}{",".join([f'metadata_{m["index"]}' for m in meta_keys])} + {query_part} + ) AS filtred_sessions + ) AS full_sessions + GROUP BY user_id + ) AS users_sessions;""", + full_args) + else: + if data.order is None: + data.order = schemas.SortOrderType.desc + sort = 'session_id' + if data.sort is not None and data.sort != "session_id": + # sort += " " + data.order + "," + helper.key_to_snake_case(data.sort) + sort = helper.key_to_snake_case(data.sort) + + meta_keys = metadata.get(project_id=project_id) + main_query = cur.format(f"""SELECT any(total) AS count, groupArray(%(sessions_limit)s)(details) AS sessions + FROM (SELECT COUNT() OVER () AS total, + rowNumberInAllBlocks() AS rn, + map({SESSION_PROJECTION_COLS_CH_MAP}) AS details + {query_part} +-- ORDER BY {sort} {data.order} + ) AS raw + WHERE rn>%(sessions_limit_s)s AND rn<=%(sessions_limit_e)s;""", full_args) + print("--------------------") + print(main_query) + print("--------------------") + try: + sessions = cur.execute(main_query) + except Exception as err: + print("--------- SESSIONS SEARCH QUERY EXCEPTION -----------") + print(main_query) + print("--------- PAYLOAD -----------") + print(data.json()) + print("--------------------") + raise err + if errors_only: + return helper.list_to_camel_case(cur.fetchall()) + + if len(sessions) > 0: + sessions = sessions[0] + # if count_only: + # return helper.dict_to_camel_case(sessions) + # for s in sessions: + # print(s) + # s["session_id"] = str(s["session_id"]) + total = sessions["count"] + sessions = sessions["sessions"] + + if data.group_by_user: + for i, s in enumerate(sessions): + sessions[i] = {**s.pop("last_session")[0], **s} + sessions[i].pop("rn") + sessions[i]["metadata"] = {k["key"]: sessions[i][f'metadata_{k["index"]}'] for k in meta_keys \ + if sessions[i][f'metadata_{k["index"]}'] is not None} + else: + for i in range(len(sessions)): + sessions[i]["metadata"] = {k["key"]: sessions[i][f'metadata_{k["index"]}'] for k in meta_keys \ + if sessions[i].get(f'metadata_{k["index"]}') is not None} + sessions[i] = schemas_ee.SessionModel.parse_obj(helper.dict_to_camel_case(sessions[i])) + + # if not data.group_by_user and data.sort is not None and data.sort != "session_id": + # sessions = sorted(sessions, key=lambda s: s[helper.key_to_snake_case(data.sort)], + # reverse=data.order.upper() == "DESC") + return { + 'total': total, + 'sessions': sessions + } + + +def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, density: int, + view_type: schemas.MetricTimeseriesViewType, metric_type: schemas.MetricType, + metric_of: schemas.TableMetricOfType, metric_value: List): + step_size = int(metrics_helper.__get_step_size(endTimestamp=data.endDate, startTimestamp=data.startDate, + density=density)) + extra_event = None + if metric_of == schemas.TableMetricOfType.visited_url: + extra_event = f"""SELECT DISTINCT ev.session_id, ev.path + FROM {exp_ch_helper.get_main_events_table(data.startDate)} AS ev + WHERE ev.datetime >= toDateTime(%(startDate)s / 1000) + AND ev.datetime <= toDateTime(%(endDate)s / 1000) + AND ev.project_id = %(project_id)s + AND ev.event_type = 'LOCATION'""" + elif metric_of == schemas.TableMetricOfType.issues and len(metric_value) > 0: + data.filters.append(schemas.SessionSearchFilterSchema(value=metric_value, type=schemas.FilterType.issue, + operator=schemas.SearchEventOperator._is)) + full_args, query_part = search_query_parts_ch(data=data, error_status=None, errors_only=False, + favorite_only=False, issue=None, project_id=project_id, + user_id=None, extra_event=extra_event) + full_args["step_size"] = step_size + sessions = [] + with ch_client.ClickHouseClient() as cur: + if metric_type == schemas.MetricType.timeseries: + if view_type == schemas.MetricTimeseriesViewType.line_chart: + query = f"""SELECT toUnixTimestamp( + toStartOfInterval(processed_sessions.datetime, INTERVAL %(step_size)s second) + ) * 1000 AS timestamp, + COUNT(processed_sessions.session_id) AS count + FROM (SELECT DISTINCT ON(s.session_id) s.session_id AS session_id, + s.datetime AS datetime + {query_part}) AS processed_sessions + GROUP BY timestamp + ORDER BY timestamp;""" + main_query = cur.format(query, full_args) + else: + main_query = cur.format(f"""SELECT count(DISTINCT s.session_id) AS count + {query_part};""", full_args) + + # print("--------------------") + # print(main_query) + # print("--------------------") + sessions = cur.execute(main_query) + if view_type == schemas.MetricTimeseriesViewType.line_chart: + sessions = metrics.__complete_missing_steps(start_time=data.startDate, end_time=data.endDate, + density=density, neutral={"count": 0}, rows=sessions) + else: + sessions = sessions[0]["count"] if len(sessions) > 0 else 0 + elif metric_type == schemas.MetricType.table: + full_args["limit_s"] = 0 + full_args["limit_e"] = 200 + if isinstance(metric_of, schemas.TableMetricOfType): + main_col = "user_id" + extra_col = "s.user_id" + extra_where = "" + pre_query = "" + if metric_of == schemas.TableMetricOfType.user_country: + main_col = "user_country" + extra_col = "s.user_country" + elif metric_of == schemas.TableMetricOfType.user_device: + main_col = "user_device" + extra_col = "s.user_device" + elif metric_of == schemas.TableMetricOfType.user_browser: + main_col = "user_browser" + extra_col = "s.user_browser" + elif metric_of == schemas.TableMetricOfType.issues: + main_col = "issue" + extra_col = f"arrayJoin(s.issue_types) AS {main_col}" + if len(metric_value) > 0: + extra_where = [] + for i in range(len(metric_value)): + arg_name = f"selected_issue_{i}" + extra_where.append(f"{main_col} = %({arg_name})s") + full_args[arg_name] = metric_value[i] + extra_where = f"WHERE ({' OR '.join(extra_where)})" + elif metric_of == schemas.TableMetricOfType.visited_url: + main_col = "path" + extra_col = "s.path" + main_query = cur.format(f"""{pre_query} + SELECT COUNT(DISTINCT {main_col}) OVER () AS main_count, + {main_col} AS name, + count(DISTINCT session_id) AS session_count + FROM (SELECT s.session_id AS session_id, + {extra_col} + {query_part} + ORDER BY s.session_id desc) AS filtred_sessions + {extra_where} + GROUP BY {main_col} + ORDER BY session_count DESC + LIMIT %(limit_e)s OFFSET %(limit_s)s;""", + full_args) + print("--------------------") + print(main_query) + print("--------------------") + sessions = cur.execute(main_query) + # cur.fetchone() + count = 0 + if len(sessions) > 0: + count = sessions[0]["main_count"] + for s in sessions: + s.pop("main_count") + sessions = {"count": count, "values": helper.list_to_camel_case(sessions)} + + return sessions + + +def __is_valid_event(is_any: bool, event: schemas._SessionSearchEventSchema): + return not (not is_any and len(event.value) == 0 and event.type not in [schemas.EventType.request_details, + schemas.EventType.graphql] \ + or event.type in [schemas.PerformanceEventType.location_dom_complete, + schemas.PerformanceEventType.location_largest_contentful_paint_time, + schemas.PerformanceEventType.location_ttfb, + schemas.PerformanceEventType.location_avg_cpu_load, + schemas.PerformanceEventType.location_avg_memory_usage + ] and (event.source is None or len(event.source) == 0) \ + or event.type in [schemas.EventType.request_details, schemas.EventType.graphql] and ( + event.filters is None or len(event.filters) == 0)) + + +# this function generates the query and return the generated-query with the dict of query arguments +def search_query_parts(data, error_status, errors_only, favorite_only, issue, project_id, user_id, extra_event=None): + ss_constraints = [] + full_args = {"project_id": project_id, "startDate": data.startDate, "endDate": data.endDate, + "projectId": project_id, "userId": user_id} + extra_constraints = [ + "s.project_id = %(project_id)s", + "s.duration IS NOT NULL" + ] + extra_from = "" + events_query_part = "" + if len(data.filters) > 0: + meta_keys = None + for i, f in enumerate(data.filters): + if not isinstance(f.value, list): + f.value = [f.value] + filter_type = f.type + f.value = helper.values_for_operator(value=f.value, op=f.operator) + f_k = f"f_value{i}" + full_args = {**full_args, **_multiple_values(f.value, value_key=f_k)} + op = __get_sql_operator(f.operator) \ + if filter_type not in [schemas.FilterType.events_count] else f.operator + is_any = _isAny_opreator(f.operator) + is_undefined = _isUndefined_operator(f.operator) + if not is_any and not is_undefined and len(f.value) == 0: + continue + is_not = False + if __is_negation_operator(f.operator): + is_not = True + if filter_type == schemas.FilterType.user_browser: + if is_any: + extra_constraints.append('s.user_browser IS NOT NULL') + ss_constraints.append('ms.user_browser IS NOT NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_os, schemas.FilterType.user_os_ios]: + if is_any: + extra_constraints.append('s.user_os IS NOT NULL') + ss_constraints.append('ms.user_os IS NOT NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_device, schemas.FilterType.user_device_ios]: + if is_any: + extra_constraints.append('s.user_device IS NOT NULL') + ss_constraints.append('ms.user_device IS NOT NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_country, schemas.FilterType.user_country_ios]: + if is_any: + extra_constraints.append('s.user_country IS NOT NULL') + ss_constraints.append('ms.user_country IS NOT NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.utm_source]: + if is_any: + extra_constraints.append('s.utm_source IS NOT NULL') + ss_constraints.append('ms.utm_source IS NOT NULL') + elif is_undefined: + extra_constraints.append('s.utm_source IS NULL') + ss_constraints.append('ms.utm_source IS NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.utm_source {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.utm_source {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_medium]: + if is_any: + extra_constraints.append('s.utm_medium IS NOT NULL') + ss_constraints.append('ms.utm_medium IS NOT NULL') + elif is_undefined: + extra_constraints.append('s.utm_medium IS NULL') + ss_constraints.append('ms.utm_medium IS NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.utm_medium {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.utm_medium {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_campaign]: + if is_any: + extra_constraints.append('s.utm_campaign IS NOT NULL') + ss_constraints.append('ms.utm_campaign IS NOT NULL') + elif is_undefined: + extra_constraints.append('s.utm_campaign IS NULL') + ss_constraints.append('ms.utm_campaign IS NULL') + else: + extra_constraints.append( + _multiple_conditions(f's.utm_campaign {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.utm_campaign {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type == schemas.FilterType.duration: + if len(f.value) > 0 and f.value[0] is not None: + extra_constraints.append("s.duration >= %(minDuration)s") + ss_constraints.append("ms.duration >= %(minDuration)s") + full_args["minDuration"] = f.value[0] + if len(f.value) > 1 and f.value[1] is not None and int(f.value[1]) > 0: + extra_constraints.append("s.duration <= %(maxDuration)s") + ss_constraints.append("ms.duration <= %(maxDuration)s") + full_args["maxDuration"] = f.value[1] + elif filter_type == schemas.FilterType.referrer: + extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)" + if is_any: + extra_constraints.append('p.base_referrer IS NOT NULL') + else: + extra_constraints.append( + _multiple_conditions(f"p.base_referrer {op} %({f_k})s", f.value, is_not=is_not, value_key=f_k)) + elif filter_type == events.event_type.METADATA.ui_type: + # get metadata list only if you need it + if meta_keys is None: + meta_keys = metadata.get(project_id=project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + if f.source in meta_keys.keys(): + if is_any: + extra_constraints.append(f"s.{metadata.index_to_colname(meta_keys[f.source])} IS NOT NULL") + ss_constraints.append(f"ms.{metadata.index_to_colname(meta_keys[f.source])} IS NOT NULL") + elif is_undefined: + extra_constraints.append(f"s.{metadata.index_to_colname(meta_keys[f.source])} IS NULL") + ss_constraints.append(f"ms.{metadata.index_to_colname(meta_keys[f.source])} IS NULL") + else: + extra_constraints.append( + _multiple_conditions( + f"s.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s::text", + f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions( + f"ms.{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s::text", + f.value, is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: + if is_any: + extra_constraints.append('s.user_id IS NOT NULL') + ss_constraints.append('ms.user_id IS NOT NULL') + elif is_undefined: + extra_constraints.append('s.user_id IS NULL') + ss_constraints.append('ms.user_id IS NULL') + else: + extra_constraints.append( + _multiple_conditions(f"s.user_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.user_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.user_anonymous_id, + schemas.FilterType.user_anonymous_id_ios]: + if is_any: + extra_constraints.append('s.user_anonymous_id IS NOT NULL') + ss_constraints.append('ms.user_anonymous_id IS NOT NULL') + elif is_undefined: + extra_constraints.append('s.user_anonymous_id IS NULL') + ss_constraints.append('ms.user_anonymous_id IS NULL') + else: + extra_constraints.append( + _multiple_conditions(f"s.user_anonymous_id {op} %({f_k})s::text", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.user_anonymous_id {op} %({f_k})s::text", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.rev_id, schemas.FilterType.rev_id_ios]: + if is_any: + extra_constraints.append('s.rev_id IS NOT NULL') + ss_constraints.append('ms.rev_id IS NOT NULL') + elif is_undefined: + extra_constraints.append('s.rev_id IS NULL') + ss_constraints.append('ms.rev_id IS NULL') + else: + extra_constraints.append( + _multiple_conditions(f"s.rev_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.rev_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) + elif filter_type == schemas.FilterType.platform: + # op = __get_sql_operator(f.operator) + extra_constraints.append( + _multiple_conditions(f"s.user_device_type {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.user_device_type {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type == schemas.FilterType.issue: + if is_any: + extra_constraints.append("array_length(s.issue_types, 1) > 0") + ss_constraints.append("array_length(ms.issue_types, 1) > 0") + else: + extra_constraints.append( + _multiple_conditions(f"%({f_k})s {op} ANY (s.issue_types)", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"%({f_k})s {op} ANY (ms.issue_types)", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type == schemas.FilterType.events_count: + extra_constraints.append( + _multiple_conditions(f"s.events_count {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.events_count {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + # --------------------------------------------------------------------------- + if len(data.events) > 0: + valid_events_count = 0 + for event in data.events: + is_any = _isAny_opreator(event.operator) + if not isinstance(event.value, list): + event.value = [event.value] + if __is_valid_event(is_any=is_any, event=event): + valid_events_count += 1 + events_query_from = [] + event_index = 0 + or_events = data.events_order == schemas.SearchEventOrder._or + # events_joiner = " FULL JOIN " if or_events else " INNER JOIN LATERAL " + events_joiner = " UNION " if or_events else " INNER JOIN LATERAL " + for i, event in enumerate(data.events): + event_type = event.type + is_any = _isAny_opreator(event.operator) + if not isinstance(event.value, list): + event.value = [event.value] + if not __is_valid_event(is_any=is_any, event=event): + continue + op = __get_sql_operator(event.operator) + is_not = False + if __is_negation_operator(event.operator): + is_not = True + op = __reverse_sql_operator(op) + if event_index == 0 or or_events: + event_from = "%s INNER JOIN public.sessions AS ms USING (session_id)" + event_where = ["ms.project_id = %(projectId)s", "main.timestamp >= %(startDate)s", + "main.timestamp <= %(endDate)s", "ms.start_ts >= %(startDate)s", + "ms.start_ts <= %(endDate)s", "ms.duration IS NOT NULL"] + if favorite_only and not errors_only: + event_from += "INNER JOIN public.user_favorite_sessions AS fs USING(session_id)" + event_where.append("fs.user_id = %(userId)s") + else: + event_from = "%s" + event_where = ["main.timestamp >= %(startDate)s", "main.timestamp <= %(endDate)s", + "main.session_id=event_0.session_id"] + if data.events_order == schemas.SearchEventOrder._then: + event_where.append(f"event_{event_index - 1}.timestamp <= main.timestamp") + e_k = f"e_value{i}" + s_k = e_k + "_source" + if event.type != schemas.PerformanceEventType.time_between_events: + event.value = helper.values_for_operator(value=event.value, op=event.operator) + full_args = {**full_args, + **_multiple_values(event.value, value_key=e_k), + **_multiple_values(event.source, value_key=s_k)} + + if event_type == events.event_type.CLICK.ui_type: + event_from = event_from % f"{events.event_type.CLICK.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.CLICK.column} {op} %({e_k})s", event.value, + value_key=e_k)) + + elif event_type == events.event_type.INPUT.ui_type: + event_from = event_from % f"{events.event_type.INPUT.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.INPUT.column} {op} %({e_k})s", event.value, + value_key=e_k)) + if event.source is not None and len(event.source) > 0: + event_where.append(_multiple_conditions(f"main.value ILIKE %(custom{i})s", event.source, + value_key=f"custom{i}")) + full_args = {**full_args, **_multiple_values(event.source, value_key=f"custom{i}")} + + elif event_type == events.event_type.LOCATION.ui_type: + event_from = event_from % f"{events.event_type.LOCATION.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.LOCATION.column} {op} %({e_k})s", + event.value, value_key=e_k)) + elif event_type == events.event_type.CUSTOM.ui_type: + event_from = event_from % f"{events.event_type.CUSTOM.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.CUSTOM.column} {op} %({e_k})s", event.value, + value_key=e_k)) + elif event_type == events.event_type.REQUEST.ui_type: + event_from = event_from % f"{events.event_type.REQUEST.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", event.value, + value_key=e_k)) + elif event_type == events.event_type.GRAPHQL.ui_type: + event_from = event_from % f"{events.event_type.GRAPHQL.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k})s", event.value, + value_key=e_k)) + elif event_type == events.event_type.STATEACTION.ui_type: + event_from = event_from % f"{events.event_type.STATEACTION.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.STATEACTION.column} {op} %({e_k})s", + event.value, value_key=e_k)) + elif event_type == events.event_type.ERROR.ui_type: + event_from = event_from % f"{events.event_type.ERROR.table} AS main INNER JOIN public.errors AS main1 USING(error_id)" + event.source = tuple(event.source) + if not is_any and event.value not in [None, "*", ""]: + event_where.append( + _multiple_conditions(f"(main1.message {op} %({e_k})s OR main1.name {op} %({e_k})s)", + event.value, value_key=e_k)) + if len(event.source) > 0 and event.source[0] not in [None, "*", ""]: + event_where.append(_multiple_conditions(f"main1.source = %({s_k})s", event.source, value_key=s_k)) + + + # ----- IOS + elif event_type == events.event_type.CLICK_IOS.ui_type: + event_from = event_from % f"{events.event_type.CLICK_IOS.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.CLICK_IOS.column} {op} %({e_k})s", + event.value, value_key=e_k)) + + elif event_type == events.event_type.INPUT_IOS.ui_type: + event_from = event_from % f"{events.event_type.INPUT_IOS.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.INPUT_IOS.column} {op} %({e_k})s", + event.value, value_key=e_k)) + if event.source is not None and len(event.source) > 0: + event_where.append(_multiple_conditions(f"main.value ILIKE %(custom{i})s", event.source, + value_key="custom{i}")) + full_args = {**full_args, **_multiple_values(event.source, f"custom{i}")} + elif event_type == events.event_type.VIEW_IOS.ui_type: + event_from = event_from % f"{events.event_type.VIEW_IOS.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.VIEW_IOS.column} {op} %({e_k})s", + event.value, value_key=e_k)) + elif event_type == events.event_type.CUSTOM_IOS.ui_type: + event_from = event_from % f"{events.event_type.CUSTOM_IOS.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.CUSTOM_IOS.column} {op} %({e_k})s", + event.value, value_key=e_k)) + elif event_type == events.event_type.REQUEST_IOS.ui_type: + event_from = event_from % f"{events.event_type.REQUEST_IOS.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.REQUEST_IOS.column} {op} %({e_k})s", + event.value, value_key=e_k)) + elif event_type == events.event_type.ERROR_IOS.ui_type: + event_from = event_from % f"{events.event_type.ERROR_IOS.table} AS main INNER JOIN public.crashes_ios AS main1 USING(crash_id)" + if not is_any and event.value not in [None, "*", ""]: + event_where.append( + _multiple_conditions(f"(main1.reason {op} %({e_k})s OR main1.name {op} %({e_k})s)", + event.value, value_key=e_k)) + elif event_type == schemas.PerformanceEventType.fetch_failed: + event_from = event_from % f"{events.event_type.REQUEST.table} AS main " + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", + event.value, value_key=e_k)) + col = performance_event.get_col(event_type) + colname = col["column"] + event_where.append(f"main.{colname} = FALSE") + # elif event_type == schemas.PerformanceEventType.fetch_duration: + # event_from = event_from % f"{events.event_type.REQUEST.table} AS main " + # if not is_any: + # event_where.append( + # _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", + # event.value, value_key=e_k)) + # col = performance_event.get_col(event_type) + # colname = col["column"] + # tname = "main" + # e_k += "_custom" + # full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + # event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " + + # _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", + # event.source, value_key=e_k)) + elif event_type in [schemas.PerformanceEventType.location_dom_complete, + schemas.PerformanceEventType.location_largest_contentful_paint_time, + schemas.PerformanceEventType.location_ttfb, + schemas.PerformanceEventType.location_avg_cpu_load, + schemas.PerformanceEventType.location_avg_memory_usage + ]: + event_from = event_from % f"{events.event_type.LOCATION.table} AS main " + col = performance_event.get_col(event_type) + colname = col["column"] + tname = "main" + if col.get("extraJoin") is not None: + tname = "ej" + event_from += f" INNER JOIN {col['extraJoin']} AS {tname} USING(session_id)" + event_where += [f"{tname}.timestamp >= main.timestamp", f"{tname}.timestamp >= %(startDate)s", + f"{tname}.timestamp <= %(endDate)s"] + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.LOCATION.column} {op} %({e_k})s", + event.value, value_key=e_k)) + e_k += "_custom" + full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + + event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " + + _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", + event.source, value_key=e_k)) + elif event_type == schemas.PerformanceEventType.time_between_events: + event_from = event_from % f"{getattr(events.event_type, event.value[0].type).table} AS main INNER JOIN {getattr(events.event_type, event.value[1].type).table} AS main2 USING(session_id) " + if not isinstance(event.value[0].value, list): + event.value[0].value = [event.value[0].value] + if not isinstance(event.value[1].value, list): + event.value[1].value = [event.value[1].value] + event.value[0].value = helper.values_for_operator(value=event.value[0].value, + op=event.value[0].operator) + event.value[1].value = helper.values_for_operator(value=event.value[1].value, + op=event.value[0].operator) + e_k1 = e_k + "_e1" + e_k2 = e_k + "_e2" + full_args = {**full_args, + **_multiple_values(event.value[0].value, value_key=e_k1), + **_multiple_values(event.value[1].value, value_key=e_k2)} + s_op = __get_sql_operator(event.value[0].operator) + event_where += ["main2.timestamp >= %(startDate)s", "main2.timestamp <= %(endDate)s"] + if event_index > 0 and not or_events: + event_where.append("main2.session_id=event_0.session_id") + is_any = _isAny_opreator(event.value[0].operator) + if not is_any: + event_where.append( + _multiple_conditions( + f"main.{getattr(events.event_type, event.value[0].type).column} {s_op} %({e_k1})s", + event.value[0].value, value_key=e_k1)) + s_op = __get_sql_operator(event.value[1].operator) + is_any = _isAny_opreator(event.value[1].operator) + if not is_any: + event_where.append( + _multiple_conditions( + f"main2.{getattr(events.event_type, event.value[1].type).column} {s_op} %({e_k2})s", + event.value[1].value, value_key=e_k2)) + + e_k += "_custom" + full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + event_where.append( + _multiple_conditions(f"main2.timestamp - main.timestamp {event.sourceOperator} %({e_k})s", + event.source, value_key=e_k)) + + elif event_type == schemas.EventType.request_details: + event_from = event_from % f"{events.event_type.REQUEST.table} AS main " + apply = False + for j, f in enumerate(event.filters): + is_any = _isAny_opreator(f.operator) + if is_any or len(f.value) == 0: + continue + f.value = helper.values_for_operator(value=f.value, op=f.operator) + op = __get_sql_operator(f.operator) + e_k_f = e_k + f"_fetch{j}" + full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} + if f.type == schemas.FetchFilterType._url: + event_where.append( + _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k_f})s::text", + f.value, value_key=e_k_f)) + apply = True + elif f.type == schemas.FetchFilterType._status_code: + event_where.append( + _multiple_conditions(f"main.status_code {f.operator} %({e_k_f})s::integer", f.value, + value_key=e_k_f)) + apply = True + elif f.type == schemas.FetchFilterType._method: + event_where.append( + _multiple_conditions(f"main.method {op} %({e_k_f})s", f.value, value_key=e_k_f)) + apply = True + elif f.type == schemas.FetchFilterType._duration: + event_where.append( + _multiple_conditions(f"main.duration {f.operator} %({e_k_f})s::integer", f.value, + value_key=e_k_f)) + apply = True + elif f.type == schemas.FetchFilterType._request_body: + event_where.append( + _multiple_conditions(f"main.request_body {op} %({e_k_f})s::text", f.value, value_key=e_k_f)) + apply = True + elif f.type == schemas.FetchFilterType._response_body: + event_where.append( + _multiple_conditions(f"main.response_body {op} %({e_k_f})s::text", f.value, + value_key=e_k_f)) + apply = True + else: + print(f"undefined FETCH filter: {f.type}") + if not apply: + continue + elif event_type == schemas.EventType.graphql: + event_from = event_from % f"{events.event_type.GRAPHQL.table} AS main " + for j, f in enumerate(event.filters): + is_any = _isAny_opreator(f.operator) + if is_any or len(f.value) == 0: + continue + f.value = helper.values_for_operator(value=f.value, op=f.operator) + op = __get_sql_operator(f.operator) + e_k_f = e_k + f"_graphql{j}" + full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} + if f.type == schemas.GraphqlFilterType._name: + event_where.append( + _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k_f})s", f.value, + value_key=e_k_f)) + elif f.type == schemas.GraphqlFilterType._method: + event_where.append( + _multiple_conditions(f"main.method {op} %({e_k_f})s", f.value, value_key=e_k_f)) + elif f.type == schemas.GraphqlFilterType._request_body: + event_where.append( + _multiple_conditions(f"main.request_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) + elif f.type == schemas.GraphqlFilterType._response_body: + event_where.append( + _multiple_conditions(f"main.response_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) + else: + print(f"undefined GRAPHQL filter: {f.type}") + else: + continue + if event_index == 0 or or_events: + event_where += ss_constraints + if is_not: + if event_index == 0 or or_events: + events_query_from.append(f"""\ + (SELECT + session_id, + 0 AS timestamp + FROM sessions + WHERE EXISTS(SELECT session_id + FROM {event_from} + WHERE {" AND ".join(event_where)} + AND sessions.session_id=ms.session_id) IS FALSE + AND project_id = %(projectId)s + AND start_ts >= %(startDate)s + AND start_ts <= %(endDate)s + AND duration IS NOT NULL + ) {"" if or_events else (f"AS event_{event_index}" + ("ON(TRUE)" if event_index > 0 else ""))}\ + """) + else: + events_query_from.append(f"""\ + (SELECT + event_0.session_id, + event_{event_index - 1}.timestamp AS timestamp + WHERE EXISTS(SELECT session_id FROM {event_from} WHERE {" AND ".join(event_where)}) IS FALSE + ) AS event_{event_index} {"ON(TRUE)" if event_index > 0 else ""}\ + """) + else: + events_query_from.append(f"""\ + (SELECT main.session_id, {"MIN" if event_index < (valid_events_count - 1) else "MAX"}(main.timestamp) AS timestamp + FROM {event_from} + WHERE {" AND ".join(event_where)} + GROUP BY 1 + ) {"" if or_events else (f"AS event_{event_index} " + ("ON(TRUE)" if event_index > 0 else ""))}\ + """) + event_index += 1 + if event_index > 0: + if or_events: + events_query_part = f"""SELECT + session_id, + MIN(timestamp) AS first_event_ts, + MAX(timestamp) AS last_event_ts + FROM ({events_joiner.join(events_query_from)}) AS u + GROUP BY 1""" + else: + events_query_part = f"""SELECT + event_0.session_id, + MIN(event_0.timestamp) AS first_event_ts, + MAX(event_{event_index - 1}.timestamp) AS last_event_ts + FROM {events_joiner.join(events_query_from)} + GROUP BY 1""" + else: + data.events = [] + # --------------------------------------------------------------------------- + if data.startDate is not None: + extra_constraints.append("s.start_ts >= %(startDate)s") + if data.endDate is not None: + extra_constraints.append("s.start_ts <= %(endDate)s") + # if data.platform is not None: + # if data.platform == schemas.PlatformType.mobile: + # extra_constraints.append(b"s.user_os in ('Android','BlackBerry OS','iOS','Tizen','Windows Phone')") + # elif data.platform == schemas.PlatformType.desktop: + # extra_constraints.append( + # b"s.user_os in ('Chrome OS','Fedora','Firefox OS','Linux','Mac OS X','Ubuntu','Windows')") + + if errors_only: + extra_from += f" INNER JOIN {events.event_type.ERROR.table} AS er USING (session_id) INNER JOIN public.errors AS ser USING (error_id)" + extra_constraints.append("ser.source = 'js_exception'") + extra_constraints.append("ser.project_id = %(project_id)s") + if error_status != schemas.ErrorStatus.all: + extra_constraints.append("ser.status = %(error_status)s") + full_args["error_status"] = error_status + if favorite_only: + extra_from += " INNER JOIN public.user_favorite_errors AS ufe USING (error_id)" + extra_constraints.append("ufe.user_id = %(userId)s") + # extra_constraints = [extra.decode('UTF-8') + "\n" for extra in extra_constraints] + if favorite_only and not errors_only and user_id is not None: + extra_from += """INNER JOIN (SELECT user_id, session_id + FROM public.user_favorite_sessions + WHERE user_id = %(userId)s) AS favorite_sessions + USING (session_id)""" + elif not favorite_only and not errors_only and user_id is not None: + extra_from += """LEFT JOIN (SELECT user_id, session_id + FROM public.user_favorite_sessions + WHERE user_id = %(userId)s) AS favorite_sessions + USING (session_id)""" + extra_join = "" + if issue is not None: + extra_join = """ + INNER JOIN LATERAL(SELECT TRUE FROM events_common.issues INNER JOIN public.issues AS p_issues USING (issue_id) + WHERE issues.session_id=f.session_id + AND p_issues.type=%(issue_type)s + AND p_issues.context_string=%(issue_contextString)s + AND timestamp >= f.first_event_ts + AND timestamp <= f.last_event_ts) AS issues ON(TRUE) + """ + full_args["issue_contextString"] = issue["contextString"] + full_args["issue_type"] = issue["type"] + if extra_event: + extra_join += f"""INNER JOIN {extra_event} AS ev USING(session_id)""" + extra_constraints.append("ev.timestamp>=%(startDate)s") + extra_constraints.append("ev.timestamp<=%(endDate)s") + query_part = f"""\ + FROM {f"({events_query_part}) AS f" if len(events_query_part) > 0 else "public.sessions AS s"} + {extra_join} + {"INNER JOIN public.sessions AS s USING(session_id)" if len(events_query_part) > 0 else ""} + {extra_from} + WHERE + {" AND ".join(extra_constraints)}""" + return full_args, query_part + + +def __get_event_type(event_type: Union[schemas.EventType, schemas.PerformanceEventType]): + defs = { + schemas.EventType.click: "CLICK", + schemas.EventType.input: "INPUT", + schemas.EventType.location: "LOCATION", + schemas.PerformanceEventType.location_dom_complete: "LOCATION", + schemas.PerformanceEventType.location_largest_contentful_paint_time: "LOCATION", + schemas.PerformanceEventType.location_ttfb: "LOCATION", + schemas.EventType.custom: "CUSTOM", + schemas.EventType.request: "REQUEST", + schemas.EventType.request_details: "REQUEST", + schemas.PerformanceEventType.fetch_failed: "REQUEST", + schemas.EventType.state_action: "STATEACTION", + schemas.EventType.error: "ERROR", + schemas.PerformanceEventType.location_avg_cpu_load: 'PERFORMANCE', + schemas.PerformanceEventType.location_avg_memory_usage: 'PERFORMANCE', + } + + if event_type not in defs: + raise Exception(f"unsupported event_type:{event_type}") + return defs.get(event_type) + + +# this function generates the query and return the generated-query with the dict of query arguments +def search_query_parts_ch(data, error_status, errors_only, favorite_only, issue, project_id, user_id, extra_event=None): + ss_constraints = [] + full_args = {"project_id": project_id, "startDate": data.startDate, "endDate": data.endDate, + "projectId": project_id, "userId": user_id} + + MAIN_EVENTS_TABLE = exp_ch_helper.get_main_events_table(data.startDate) + MAIN_SESSIONS_TABLE = exp_ch_helper.get_main_sessions_table(data.startDate) + + full_args["MAIN_EVENTS_TABLE"] = MAIN_EVENTS_TABLE + full_args["MAIN_SESSIONS_TABLE"] = MAIN_SESSIONS_TABLE + extra_constraints = [ + "s.project_id = %(project_id)s", + "isNotNull(s.duration)" + ] + if favorite_only: + extra_constraints.append("""s.session_id IN (SELECT session_id + FROM final.user_favorite_sessions + WHERE user_id = %(userId)s)""") + extra_from = "" + events_query_part = "" + __events_where_basic = ["project_id = %(projectId)s", + "datetime >= toDateTime(%(startDate)s/1000)", + "datetime <= toDateTime(%(endDate)s/1000)"] + events_conditions_where = ["main.project_id = %(projectId)s", + "main.datetime >= toDateTime(%(startDate)s/1000)", + "main.datetime <= toDateTime(%(endDate)s/1000)"] + if len(data.filters) > 0: + meta_keys = None + # to reduce include a sub-query of sessions inside events query, in order to reduce the selected data + include_in_events = False + for i, f in enumerate(data.filters): + if not isinstance(f.value, list): + f.value = [f.value] + filter_type = f.type + f.value = helper.values_for_operator(value=f.value, op=f.operator) + f_k = f"f_value{i}" + full_args = {**full_args, f_k: f.value, **_multiple_values(f.value, value_key=f_k)} + op = __get_sql_operator(f.operator) \ + if filter_type not in [schemas.FilterType.events_count] else f.operator + is_any = _isAny_opreator(f.operator) + is_undefined = _isUndefined_operator(f.operator) + if not is_any and not is_undefined and len(f.value) == 0: + continue + is_not = False + if __is_negation_operator(f.operator): + is_not = True + if filter_type == schemas.FilterType.user_browser: + if is_any: + extra_constraints.append('isNotNull(s.user_browser)') + ss_constraints.append('isNotNull(ms.user_browser)') + else: + extra_constraints.append( + _multiple_conditions(f's.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_os, schemas.FilterType.user_os_ios]: + if is_any: + extra_constraints.append('isNotNull(s.user_os)') + ss_constraints.append('isNotNull(ms.user_os)') + else: + extra_constraints.append( + _multiple_conditions(f's.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_device, schemas.FilterType.user_device_ios]: + if is_any: + extra_constraints.append('isNotNull(s.user_device)') + ss_constraints.append('isNotNull(ms.user_device)') + else: + extra_constraints.append( + _multiple_conditions(f's.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.user_country, schemas.FilterType.user_country_ios]: + if is_any: + extra_constraints.append('isNotNull(s.user_country)') + ss_constraints.append('isNotNull(ms.user_country)') + else: + extra_constraints.append( + _multiple_conditions(f's.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif filter_type in [schemas.FilterType.utm_source]: + if is_any: + extra_constraints.append('isNotNull(s.utm_source)') + ss_constraints.append('isNotNull(ms.utm_source)') + elif is_undefined: + extra_constraints.append('isNull(s.utm_source)') + ss_constraints.append('isNull(ms.utm_source)') + else: + extra_constraints.append( + _multiple_conditions(f's.utm_source {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.utm_source {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_medium]: + if is_any: + extra_constraints.append('isNotNull(s.utm_medium)') + ss_constraints.append('isNotNull(ms.utm_medium)') + elif is_undefined: + extra_constraints.append('isNull(s.utm_medium)') + ss_constraints.append('isNull(ms.utm_medium') + else: + extra_constraints.append( + _multiple_conditions(f's.utm_medium {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.utm_medium {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.utm_campaign]: + if is_any: + extra_constraints.append('isNotNull(s.utm_campaign)') + ss_constraints.append('isNotNull(ms.utm_campaign)') + elif is_undefined: + extra_constraints.append('isNull(s.utm_campaign)') + ss_constraints.append('isNull(ms.utm_campaign)') + else: + extra_constraints.append( + _multiple_conditions(f's.utm_campaign {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f'ms.utm_campaign {op} toString(%({f_k})s)', f.value, is_not=is_not, + value_key=f_k)) + + elif filter_type == schemas.FilterType.duration: + if len(f.value) > 0 and f.value[0] is not None: + extra_constraints.append("s.duration >= %(minDuration)s") + ss_constraints.append("ms.duration >= %(minDuration)s") + full_args["minDuration"] = f.value[0] + if len(f.value) > 1 and f.value[1] is not None and int(f.value[1]) > 0: + extra_constraints.append("s.duration <= %(maxDuration)s") + ss_constraints.append("ms.duration <= %(maxDuration)s") + full_args["maxDuration"] = f.value[1] + elif filter_type == schemas.FilterType.referrer: + # extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)" + if is_any: + referrer_constraint = 'isNotNull(r.base_referrer)' + else: + referrer_constraint = _multiple_conditions(f"r.base_referrer {op} %({f_k})s", f.value, + is_not=is_not, value_key=f_k) + referrer_constraint = f"""(SELECT DISTINCT session_id + FROM {MAIN_EVENTS_TABLE} AS r + WHERE {" AND ".join([f"r.{b}" for b in __events_where_basic])} + AND event_type='{__get_event_type(schemas.EventType.location)}' + AND {referrer_constraint})""" + # events_conditions_where.append(f"""main.session_id IN {referrer_constraint}""") + # extra_constraints.append(f"""s.session_id IN {referrer_constraint}""") + extra_from += f"\nINNER JOIN {referrer_constraint} AS referred ON(referred.session_id=s.session_id)" + elif filter_type == events.event_type.METADATA.ui_type: + # get metadata list only if you need it + if meta_keys is None: + meta_keys = metadata.get(project_id=project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + if f.source in meta_keys.keys(): + if is_any: + extra_constraints.append(f"isNotNull(s.{metadata.index_to_colname(meta_keys[f.source])})") + ss_constraints.append(f"isNotNull(ms.{metadata.index_to_colname(meta_keys[f.source])})") + elif is_undefined: + extra_constraints.append(f"isNull(s.{metadata.index_to_colname(meta_keys[f.source])})") + ss_constraints.append(f"isNull(ms.{metadata.index_to_colname(meta_keys[f.source])})") + else: + extra_constraints.append( + _multiple_conditions( + f"s.{metadata.index_to_colname(meta_keys[f.source])} {op} toString(%({f_k})s)", + f.value, is_not=is_not, value_key=f_k)) + ss_constraints.append( + _multiple_conditions( + f"ms.{metadata.index_to_colname(meta_keys[f.source])} {op} toString(%({f_k})s)", + f.value, is_not=is_not, value_key=f_k)) + elif filter_type in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: + if is_any: + extra_constraints.append('isNotNull(s.user_id)') + ss_constraints.append('isNotNull(ms.user_id)') + elif is_undefined: + extra_constraints.append('isNull(s.user_id)') + ss_constraints.append('isNull(ms.user_id)') + else: + extra_constraints.append( + _multiple_conditions(f"s.user_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.user_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.user_anonymous_id, + schemas.FilterType.user_anonymous_id_ios]: + if is_any: + extra_constraints.append('isNotNull(s.user_anonymous_id)') + ss_constraints.append('isNotNull(ms.user_anonymous_id)') + elif is_undefined: + extra_constraints.append('isNull(s.user_anonymous_id)') + ss_constraints.append('isNull(ms.user_anonymous_id)') + else: + extra_constraints.append( + _multiple_conditions(f"s.user_anonymous_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.user_anonymous_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type in [schemas.FilterType.rev_id, schemas.FilterType.rev_id_ios]: + if is_any: + extra_constraints.append('isNotNull(s.rev_id)') + ss_constraints.append('isNotNull(ms.rev_id)') + elif is_undefined: + extra_constraints.append('isNull(s.rev_id)') + ss_constraints.append('isNull(ms.rev_id)') + else: + extra_constraints.append( + _multiple_conditions(f"s.rev_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.rev_id {op} toString(%({f_k})s)", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type == schemas.FilterType.platform: + # op = __get_sql_operator(f.operator) + extra_constraints.append( + _multiple_conditions(f"s.user_device_type {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.user_device_type {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + elif filter_type == schemas.FilterType.issue: + if is_any: + extra_constraints.append("notEmpty(s.issue_types)") + ss_constraints.append("notEmpty(ms.issue_types)") + else: + extra_constraints.append(f"hasAny(s.issue_types,%({f_k})s)") + # _multiple_conditions(f"%({f_k})s {op} ANY (s.issue_types)", f.value, is_not=is_not, + # value_key=f_k)) + ss_constraints.append(f"hasAny(ms.issue_types,%({f_k})s)") + # _multiple_conditions(f"%({f_k})s {op} ANY (ms.issue_types)", f.value, is_not=is_not, + # value_key=f_k)) + if is_not: + extra_constraints[-1] = f"not({extra_constraints[-1]})" + ss_constraints[-1] = f"not({ss_constraints[-1]})" + elif filter_type == schemas.FilterType.events_count: + extra_constraints.append( + _multiple_conditions(f"s.events_count {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + ss_constraints.append( + _multiple_conditions(f"ms.events_count {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + else: + continue + include_in_events = True + + if include_in_events: + events_conditions_where.append(f"""main.session_id IN (SELECT s.session_id + FROM {MAIN_SESSIONS_TABLE} AS s + WHERE {" AND ".join(extra_constraints)})""") + # --------------------------------------------------------------------------- + events_extra_join = "" + if len(data.events) > 0: + valid_events_count = 0 + for event in data.events: + is_any = _isAny_opreator(event.operator) + if not isinstance(event.value, list): + event.value = [event.value] + if __is_valid_event(is_any=is_any, event=event): + valid_events_count += 1 + events_query_from = [] + events_conditions = [] + events_conditions_not = [] + event_index = 0 + or_events = data.events_order == schemas.SearchEventOrder._or + # events_joiner = " UNION " if or_events else " INNER JOIN LATERAL " + for i, event in enumerate(data.events): + event_type = event.type + is_any = _isAny_opreator(event.operator) + if not isinstance(event.value, list): + event.value = [event.value] + if not __is_valid_event(is_any=is_any, event=event): + continue + op = __get_sql_operator(event.operator) + is_not = False + if __is_negation_operator(event.operator): + is_not = True + op = __reverse_sql_operator(op) + # if event_index == 0 or or_events: + # event_from = f"%s INNER JOIN {MAIN_SESSIONS_TABLE} AS ms USING (session_id)" + event_from = "%s" + event_where = ["main.project_id = %(projectId)s", + "main.datetime >= toDateTime(%(startDate)s/1000)", + "main.datetime <= toDateTime(%(endDate)s/1000)"] + if favorite_only and not errors_only: + event_from += "INNER JOIN final.user_favorite_sessions AS fs USING(session_id)" + event_where.append("fs.user_id = %(userId)s") + # else: + # event_from = "%s" + # event_where = ["main.datetime >= toDateTime(%(startDate)s/1000)", + # "main.datetime <= toDateTime(%(endDate)s/1000)", + # "main.session_id=event_0.session_id"] + # if data.events_order == schemas.SearchEventOrder._then: + # event_where.append(f"event_{event_index - 1}.datetime <= main.datetime") + e_k = f"e_value{i}" + s_k = e_k + "_source" + if event.type != schemas.PerformanceEventType.time_between_events: + event.value = helper.values_for_operator(value=event.value, op=event.operator) + full_args = {**full_args, + **_multiple_values(event.value, value_key=e_k), + **_multiple_values(event.source, value_key=s_k)} + + if event_type == events.event_type.CLICK.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.CLICK.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions[-1]["condition"] = event_where[-1] + + elif event_type == events.event_type.INPUT.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.INPUT.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions[-1]["condition"] = event_where[-1] + if event.source is not None and len(event.source) > 0: + event_where.append(_multiple_conditions(f"main.value ILIKE %(custom{i})s", event.source, + value_key=f"custom{i}")) + full_args = {**full_args, **_multiple_values(event.source, value_key=f"custom{i}")} + + elif event_type == events.event_type.LOCATION.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.LOCATION.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", + event.value, value_key=e_k)) + events_conditions[-1]["condition"] = event_where[-1] + elif event_type == events.event_type.CUSTOM.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.CUSTOM.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions[-1]["condition"] = event_where[-1] + elif event_type == events.event_type.REQUEST.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.REQUEST.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions[-1]["condition"] = event_where[-1] + # elif event_type == events.event_type.GRAPHQL.ui_type: + # event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main" + # event_where.append(f"main.event_type='GRAPHQL'") + # events_conditions.append({"type": event_where[-1]}) + # if not is_any: + # event_where.append( + # _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k})s", event.value, + # value_key=e_k)) + # events_conditions[-1]["condition"] = event_where[-1] + elif event_type == events.event_type.STATEACTION.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.STATEACTION.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", + event.value, value_key=e_k)) + events_conditions[-1]["condition"] = event_where[-1] + # TODO: isNot for ERROR + elif event_type == events.event_type.ERROR.ui_type: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main" + events_extra_join = "SELECT * FROM final.errors AS main1 WHERE main1.project_id=%(project_id)s" + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + event.source = tuple(event.source) + events_conditions[-1]["condition"] = [] + if not is_any and event.value not in [None, "*", ""]: + event_where.append( + _multiple_conditions(f"(main1.message {op} %({e_k})s OR main1.name {op} %({e_k})s)", + event.value, value_key=e_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + events_extra_join += f" AND {event_where[-1]}" + if len(event.source) > 0 and event.source[0] not in [None, "*", ""]: + event_where.append(_multiple_conditions(f"main1.source = %({s_k})s", event.source, value_key=s_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + events_extra_join += f" AND {event_where[-1]}" + + events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) + + elif event_type == schemas.PerformanceEventType.fetch_failed: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + _column = events.event_type.REQUEST.column + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + events_conditions[-1]["condition"] = [] + if not is_any: + if is_not: + event_where.append(_multiple_conditions(f"sub.{_column} {op} %({e_k})s", event.value, + value_key=e_k)) + events_conditions_not.append({"type": f"sub.event_type='{__get_event_type(event_type)}'"}) + events_conditions_not[-1]["condition"] = event_where[-1] + else: + event_where.append(_multiple_conditions(f"main.{_column} {op} %({e_k})s", + event.value, value_key=e_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + col = performance_event.get_col(event_type) + colname = col["column"] + event_where.append(f"main.{colname} = 0") + events_conditions[-1]["condition"].append(event_where[-1]) + events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) + + # elif event_type == schemas.PerformanceEventType.fetch_duration: + # event_from = event_from % f"{events.event_type.REQUEST.table} AS main " + # if not is_any: + # event_where.append( + # _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k})s", + # event.value, value_key=e_k)) + # col = performance_event.get_col(event_type) + # colname = col["column"] + # tname = "main" + # e_k += "_custom" + # full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + # event_where.append(f"{tname}.{colname} IS NOT NULL AND {tname}.{colname}>0 AND " + + # _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", + # event.source, value_key=e_k)) + # TODO: isNot for PerformanceEvent + elif event_type in [schemas.PerformanceEventType.location_dom_complete, + schemas.PerformanceEventType.location_largest_contentful_paint_time, + schemas.PerformanceEventType.location_ttfb]: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + events_conditions[-1]["condition"] = [] + col = performance_event.get_col(event_type) + colname = col["column"] + tname = "main" + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.LOCATION.column} {op} %({e_k})s", + event.value, value_key=e_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + e_k += "_custom" + full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + + event_where.append(f"isNotNull({tname}.{colname}) AND {tname}.{colname}>0 AND " + + _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", + event.source, value_key=e_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) + # TODO: isNot for PerformanceEvent + elif event_type in [schemas.PerformanceEventType.location_avg_cpu_load, + schemas.PerformanceEventType.location_avg_memory_usage]: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + events_conditions[-1]["condition"] = [] + col = performance_event.get_col(event_type) + colname = col["column"] + tname = "main" + if not is_any: + event_where.append( + _multiple_conditions(f"main.{events.event_type.LOCATION.column} {op} %({e_k})s", + event.value, value_key=e_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + e_k += "_custom" + full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + + event_where.append(f"isNotNull({tname}.{colname}) AND {tname}.{colname}>0 AND " + + _multiple_conditions(f"{tname}.{colname} {event.sourceOperator} %({e_k})s", + event.source, value_key=e_k)) + events_conditions[-1]["condition"].append(event_where[-1]) + events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) + # TODO: no isNot for TimeBetweenEvents + elif event_type == schemas.PerformanceEventType.time_between_events: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + # event_from = event_from % f"{getattr(events.event_type, event.value[0].type).table} AS main INNER JOIN {getattr(events.event_type, event.value[1].type).table} AS main2 USING(session_id) " + event_where.append(f"main.event_type='{__get_event_type(event.value[0].type)}'") + events_conditions.append({"type": event_where[-1]}) + event_where.append(f"main.event_type='{__get_event_type(event.value[0].type)}'") + events_conditions.append({"type": event_where[-1]}) + + if not isinstance(event.value[0].value, list): + event.value[0].value = [event.value[0].value] + if not isinstance(event.value[1].value, list): + event.value[1].value = [event.value[1].value] + event.value[0].value = helper.values_for_operator(value=event.value[0].value, + op=event.value[0].operator) + event.value[1].value = helper.values_for_operator(value=event.value[1].value, + op=event.value[0].operator) + e_k1 = e_k + "_e1" + e_k2 = e_k + "_e2" + full_args = {**full_args, + **_multiple_values(event.value[0].value, value_key=e_k1), + **_multiple_values(event.value[1].value, value_key=e_k2)} + s_op = __get_sql_operator(event.value[0].operator) + # event_where += ["main2.timestamp >= %(startDate)s", "main2.timestamp <= %(endDate)s"] + # if event_index > 0 and not or_events: + # event_where.append("main2.session_id=event_0.session_id") + is_any = _isAny_opreator(event.value[0].operator) + if not is_any: + event_where.append( + _multiple_conditions( + f"main.{getattr(events.event_type, event.value[0].type).column} {s_op} %({e_k1})s", + event.value[0].value, value_key=e_k1)) + events_conditions[-2]["condition"] = event_where[-1] + s_op = __get_sql_operator(event.value[1].operator) + is_any = _isAny_opreator(event.value[1].operator) + if not is_any: + event_where.append( + _multiple_conditions( + f"main.{getattr(events.event_type, event.value[1].type).column} {s_op} %({e_k2})s", + event.value[1].value, value_key=e_k2)) + events_conditions[-1]["condition"] = event_where[-1] + + e_k += "_custom" + full_args = {**full_args, **_multiple_values(event.source, value_key=e_k)} + # event_where.append( + # _multiple_conditions(f"main2.timestamp - main.timestamp {event.sourceOperator} %({e_k})s", + # event.source, value_key=e_k)) + # events_conditions[-2]["time"] = f"(?t{event.sourceOperator} %({e_k})s)" + events_conditions[-2]["time"] = _multiple_conditions(f"?t{event.sourceOperator}%({e_k})s", event.source, + value_key=e_k) + event_index += 1 + # TODO: no isNot for RequestDetails + elif event_type == schemas.EventType.request_details: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + event_where.append(f"main.event_type='{__get_event_type(event_type)}'") + events_conditions.append({"type": event_where[-1]}) + apply = False + events_conditions[-1]["condition"] = [] + for j, f in enumerate(event.filters): + is_any = _isAny_opreator(f.operator) + if is_any or len(f.value) == 0: + continue + f.value = helper.values_for_operator(value=f.value, op=f.operator) + op = __get_sql_operator(f.operator) + e_k_f = e_k + f"_fetch{j}" + full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} + if f.type == schemas.FetchFilterType._url: + event_where.append( + _multiple_conditions(f"main.{events.event_type.REQUEST.column} {op} %({e_k_f})s", f.value, + value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + apply = True + elif f.type == schemas.FetchFilterType._status_code: + event_where.append( + _multiple_conditions(f"main.status {f.operator} %({e_k_f})s", f.value, + value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + apply = True + elif f.type == schemas.FetchFilterType._method: + event_where.append( + _multiple_conditions(f"main.method {op} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + apply = True + elif f.type == schemas.FetchFilterType._duration: + event_where.append( + _multiple_conditions(f"main.duration {f.operator} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + apply = True + elif f.type == schemas.FetchFilterType._request_body: + event_where.append( + _multiple_conditions(f"main.request_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + apply = True + elif f.type == schemas.FetchFilterType._response_body: + event_where.append( + _multiple_conditions(f"main.response_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + apply = True + else: + print(f"undefined FETCH filter: {f.type}") + if not apply: + continue + else: + events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) + # TODO: no isNot for GraphQL + elif event_type == schemas.EventType.graphql: + event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " + event_where.append(f"main.event_type='GRAPHQL'") + events_conditions.append({"type": event_where[-1]}) + events_conditions[-1]["condition"] = [] + for j, f in enumerate(event.filters): + is_any = _isAny_opreator(f.operator) + if is_any or len(f.value) == 0: + continue + f.value = helper.values_for_operator(value=f.value, op=f.operator) + op = __get_sql_operator(f.operator) + e_k_f = e_k + f"_graphql{j}" + full_args = {**full_args, **_multiple_values(f.value, value_key=e_k_f)} + if f.type == schemas.GraphqlFilterType._name: + event_where.append( + _multiple_conditions(f"main.{events.event_type.GRAPHQL.column} {op} %({e_k_f})s", f.value, + value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + elif f.type == schemas.GraphqlFilterType._method: + event_where.append( + _multiple_conditions(f"main.method {op} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + elif f.type == schemas.GraphqlFilterType._request_body: + event_where.append( + _multiple_conditions(f"main.request_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + elif f.type == schemas.GraphqlFilterType._response_body: + event_where.append( + _multiple_conditions(f"main.response_body {op} %({e_k_f})s", f.value, value_key=e_k_f)) + events_conditions[-1]["condition"].append(event_where[-1]) + else: + print(f"undefined GRAPHQL filter: {f.type}") + events_conditions[-1]["condition"] = " AND ".join(events_conditions[-1]["condition"]) + else: + continue + if event_index == 0 or or_events: + event_where += ss_constraints + if is_not: + if event_index == 0 or or_events: + events_query_from.append(f"""\ + (SELECT + session_id, + 0 AS timestamp + FROM sessions + WHERE EXISTS(SELECT session_id + FROM {event_from} + WHERE {" AND ".join(event_where)} + AND sessions.session_id=ms.session_id) IS FALSE + AND project_id = %(projectId)s + AND start_ts >= %(startDate)s + AND start_ts <= %(endDate)s + AND duration IS NOT NULL + ) {"" if or_events else (f"AS event_{event_index}" + ("ON(TRUE)" if event_index > 0 else ""))}\ + """) + else: + events_query_from.append(f"""\ + (SELECT + event_0.session_id, + event_{event_index - 1}.timestamp AS timestamp + WHERE EXISTS(SELECT session_id FROM {event_from} WHERE {" AND ".join(event_where)}) IS FALSE + ) AS event_{event_index} {"ON(TRUE)" if event_index > 0 else ""}\ + """) + else: + if data.events_order == schemas.SearchEventOrder._then: + pass + else: + events_query_from.append(f"""\ + (SELECT main.session_id, {"MIN" if event_index < (valid_events_count - 1) else "MAX"}(main.datetime) AS datetime + FROM {event_from} + WHERE {" AND ".join(event_where)} + GROUP BY session_id + ) {"" if or_events else (f"AS event_{event_index} " + ("ON(TRUE)" if event_index > 0 else ""))}\ + """) + event_index += 1 + + if event_index < 2: + data.events_order = schemas.SearchEventOrder._or + if len(events_extra_join) > 0: + if event_index < 2: + events_extra_join = f"INNER JOIN ({events_extra_join}) AS main1 USING(error_id)" + else: + events_extra_join = f"LEFT JOIN ({events_extra_join}) AS main1 USING(error_id)" + if favorite_only and user_id is not None: + events_conditions_where.append("""main.session_id IN (SELECT session_id + FROM final.user_favorite_sessions + WHERE user_id = %(userId)s)""") + + if data.events_order in [schemas.SearchEventOrder._then, schemas.SearchEventOrder._and]: + sequence_pattern = [f'(?{i + 1}){c.get("time", "")}' for i, c in enumerate(events_conditions)] + sub_join = "" + type_conditions = [] + value_conditions = [] + _value_conditions = [] + sequence_conditions = [] + for c in events_conditions: + if c['type'] not in type_conditions: + type_conditions.append(c['type']) + + if c.get('condition') \ + and c['condition'] not in value_conditions \ + and c['condition'] % full_args not in _value_conditions: + value_conditions.append(c['condition']) + _value_conditions.append(c['condition'] % full_args) + + sequence_conditions.append(c['type']) + if c.get('condition'): + sequence_conditions[-1] += " AND " + c["condition"] + + del _value_conditions + events_conditions_where.append(f"({' OR '.join([c for c in type_conditions])})") + del type_conditions + if len(value_conditions) > 0: + events_conditions_where.append(f"({' OR '.join([c for c in value_conditions])})") + del value_conditions + if len(events_conditions_not) > 0: + _value_conditions_not = [] + value_conditions_not = [] + for c in events_conditions_not: + p = f"{c['type']} AND {c['condition']}" + _p = p % full_args + if _p not in _value_conditions_not: + _value_conditions_not.append(_p) + value_conditions_not.append(p) + value_conditions_not = [f"sub.{c}" for c in __events_where_basic] + value_conditions_not + sub_join = f"""LEFT ANTI JOIN ( SELECT DISTINCT sub.session_id + FROM {MAIN_EVENTS_TABLE} AS sub + WHERE {' AND '.join([c for c in value_conditions_not])}) AS sub USING(session_id)""" + del _value_conditions_not + del value_conditions_not + + if data.events_order == schemas.SearchEventOrder._then: + having = f"""HAVING sequenceMatch('{''.join(sequence_pattern)}')(main.datetime,{','.join(sequence_conditions)})""" + else: + having = f"""HAVING {" AND ".join([f"countIf({c})>0" for c in list(set(sequence_conditions))])}""" + + events_query_part = f"""SELECT main.session_id, + MIN(main.datetime) AS first_event_ts, + MAX(main.datetime) AS last_event_ts + FROM {MAIN_EVENTS_TABLE} AS main {events_extra_join} + {sub_join} + WHERE {" AND ".join(events_conditions_where)} + GROUP BY session_id + {having}""" + else: + print(">>>>> OR EVENTS") + type_conditions = [] + sequence_conditions = [] + has_values = False + for c in events_conditions: + if c['type'] not in type_conditions: + type_conditions.append(c['type']) + + sequence_conditions.append(c['type']) + if c.get('condition'): + has_values = True + sequence_conditions[-1] += " AND " + c["condition"] + + events_conditions_where.append(f"({' OR '.join([c for c in type_conditions])})") + + if len(events_conditions_not) > 0: + has_values = True + _value_conditions_not = [] + value_conditions_not = [] + for c in events_conditions_not: + p = f"{c['type']} AND not({c['condition']})".replace("sub.", "main.") + _p = p % full_args + if _p not in _value_conditions_not: + _value_conditions_not.append(_p) + value_conditions_not.append(p) + del _value_conditions_not + sequence_conditions += value_conditions_not + + if has_values: + events_conditions = [c for c in list(set(sequence_conditions))] + events_conditions_where.append(f"({' OR '.join(events_conditions)})") + events_query_part = f"""SELECT main.session_id, + MIN(main.datetime) AS first_event_ts, + MAX(main.datetime) AS last_event_ts + FROM {MAIN_EVENTS_TABLE} AS main {events_extra_join} + WHERE {" AND ".join(events_conditions_where)} + GROUP BY session_id""" + else: + data.events = [] + # --------------------------------------------------------------------------- + if data.startDate is not None: + extra_constraints.append("s.datetime >= toDateTime(%(startDate)s/1000)") + if data.endDate is not None: + extra_constraints.append("s.datetime <= toDateTime(%(endDate)s/1000)") + # if data.platform is not None: + # if data.platform == schemas.PlatformType.mobile: + # extra_constraints.append(b"s.user_os in ('Android','BlackBerry OS','iOS','Tizen','Windows Phone')") + # elif data.platform == schemas.PlatformType.desktop: + # extra_constraints.append( + # b"s.user_os in ('Chrome OS','Fedora','Firefox OS','Linux','Mac OS X','Ubuntu','Windows')") + + # if errors_only: + # extra_from += f" INNER JOIN {events.event_type.ERROR.table} AS er USING (session_id) INNER JOIN public.errors AS ser USING (error_id)" + # extra_constraints.append("ser.source = 'js_exception'") + # extra_constraints.append("ser.project_id = %(project_id)s") + # if error_status != schemas.ErrorStatus.all: + # extra_constraints.append("ser.status = %(error_status)s") + # full_args["error_status"] = error_status + # if favorite_only: + # extra_from += " INNER JOIN final.user_favorite_errors AS ufe USING (error_id)" + # extra_constraints.append("ufe.user_id = %(userId)s") + + if favorite_only and not errors_only and user_id is not None: + extra_from += """INNER JOIN (SELECT 1 AS session_id) AS favorite_sessions + ON (TRUE)""" + elif not favorite_only and not errors_only and user_id is not None: + extra_from += """LEFT JOIN (SELECT session_id + FROM final.user_favorite_sessions + WHERE user_id = %(userId)s) AS favorite_sessions + ON (s.session_id=favorite_sessions.session_id)""" + extra_join = "" + if issue is not None: + extra_join = """ + INNER JOIN LATERAL(SELECT TRUE FROM events_common.issues INNER JOIN public.issues AS p_issues USING (issue_id) + WHERE issues.session_id=f.session_id + AND p_issues.type=%(issue_type)s + AND p_issues.context_string=%(issue_contextString)s + AND timestamp >= f.first_event_ts + AND timestamp <= f.last_event_ts) AS issues ON(TRUE) + """ + full_args["issue_contextString"] = issue["contextString"] + full_args["issue_type"] = issue["type"] + + if extra_event: + extra_event = f"INNER JOIN ({extra_event}) AS extra_event USING(session_id)" + # extra_join = f"""INNER JOIN {extra_event} AS ev USING(session_id)""" + # extra_constraints.append("ev.timestamp>=%(startDate)s") + # extra_constraints.append("ev.timestamp<=%(endDate)s") + else: + extra_event = "" + if errors_only: + query_part = f"""{f"({events_query_part}) AS f" if len(events_query_part) > 0 else ""}""" + else: + if len(events_query_part) > 0: + extra_join += f"""INNER JOIN (SELECT * + FROM {MAIN_SESSIONS_TABLE} AS s {extra_event} + WHERE {" AND ".join(extra_constraints)}) AS s ON(s.session_id=f.session_id)""" + else: + extra_join += f"""(SELECT * + FROM {MAIN_SESSIONS_TABLE} AS s {extra_event} + WHERE {" AND ".join(extra_constraints)}) AS s""" + query_part = f"""\ + FROM {f"({events_query_part}) AS f" if len(events_query_part) > 0 else ""} + {extra_join} + {extra_from} + """ + return full_args, query_part + + +def search_by_metadata(tenant_id, user_id, m_key, m_value, project_id=None): + if project_id is None: + all_projects = projects.get_projects(tenant_id=tenant_id, recording_state=False) + else: + all_projects = [ + projects.get_project(tenant_id=tenant_id, project_id=int(project_id), include_last_session=False, + include_gdpr=False)] + + all_projects = {int(p["projectId"]): p["name"] for p in all_projects} + project_ids = list(all_projects.keys()) + + available_keys = metadata.get_keys_by_projects(project_ids) + for i in available_keys: + available_keys[i]["user_id"] = schemas.FilterType.user_id + available_keys[i]["user_anonymous_id"] = schemas.FilterType.user_anonymous_id + results = {} + for i in project_ids: + if m_key not in available_keys[i].values(): + available_keys.pop(i) + results[i] = {"total": 0, "sessions": [], "missingMetadata": True} + project_ids = list(available_keys.keys()) + if len(project_ids) > 0: + with pg_client.PostgresClient() as cur: + sub_queries = [] + for i in project_ids: + col_name = list(available_keys[i].keys())[list(available_keys[i].values()).index(m_key)] + sub_queries.append(cur.mogrify( + f"(SELECT COALESCE(COUNT(s.*)) AS count FROM public.sessions AS s WHERE s.project_id = %(id)s AND s.{col_name} = %(value)s) AS \"{i}\"", + {"id": i, "value": m_value}).decode('UTF-8')) + query = f"""SELECT {", ".join(sub_queries)};""" + cur.execute(query=query) + + rows = cur.fetchone() + + sub_queries = [] + for i in rows.keys(): + results[i] = {"total": rows[i], "sessions": [], "missingMetadata": False, "name": all_projects[int(i)]} + if rows[i] > 0: + col_name = list(available_keys[int(i)].keys())[list(available_keys[int(i)].values()).index(m_key)] + sub_queries.append( + cur.mogrify( + f"""( + SELECT * + FROM ( + SELECT DISTINCT ON(favorite_sessions.session_id, s.session_id) {SESSION_PROJECTION_COLS} + FROM public.sessions AS s LEFT JOIN (SELECT session_id + FROM public.user_favorite_sessions + WHERE user_favorite_sessions.user_id = %(userId)s + ) AS favorite_sessions USING (session_id) + WHERE s.project_id = %(id)s AND s.duration IS NOT NULL AND s.{col_name} = %(value)s + ) AS full_sessions + ORDER BY favorite DESC, issue_score DESC + LIMIT 10 + )""", + {"id": i, "value": m_value, "userId": user_id}).decode('UTF-8')) + if len(sub_queries) > 0: + cur.execute("\nUNION\n".join(sub_queries)) + rows = cur.fetchall() + for i in rows: + results[str(i["project_id"])]["sessions"].append(helper.dict_to_camel_case(i)) + return results + + +def search_by_issue(user_id, issue, project_id, start_date, end_date): + constraints = ["s.project_id = %(projectId)s", + "p_issues.context_string = %(issueContextString)s", + "p_issues.type = %(issueType)s"] + if start_date is not None: + constraints.append("start_ts >= %(startDate)s") + if end_date is not None: + constraints.append("start_ts <= %(endDate)s") + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify( + f"""SELECT DISTINCT ON(favorite_sessions.session_id, s.session_id) {SESSION_PROJECTION_COLS} + FROM public.sessions AS s + INNER JOIN events_common.issues USING (session_id) + INNER JOIN public.issues AS p_issues USING (issue_id) + LEFT JOIN (SELECT user_id, session_id + FROM public.user_favorite_sessions + WHERE user_id = %(userId)s) AS favorite_sessions + USING (session_id) + WHERE {" AND ".join(constraints)} + ORDER BY s.session_id DESC;""", + { + "issueContextString": issue["contextString"], + "issueType": issue["type"], "userId": user_id, + "projectId": project_id, + "startDate": start_date, + "endDate": end_date + })) + + rows = cur.fetchall() + return helper.list_to_camel_case(rows) + + +def get_user_sessions(project_id, user_id, start_date, end_date): + with pg_client.PostgresClient() as cur: + constraints = ["s.project_id = %(projectId)s", "s.user_id = %(userId)s"] + if start_date is not None: + constraints.append("s.start_ts >= %(startDate)s") + if end_date is not None: + constraints.append("s.start_ts <= %(endDate)s") + + query_part = f"""\ + FROM public.sessions AS s + WHERE {" AND ".join(constraints)}""" + + cur.execute(cur.mogrify(f"""\ + SELECT s.project_id, + s.session_id::text AS session_id, + s.user_uuid, + s.user_id, + s.user_os, + s.user_browser, + s.user_device, + s.user_country, + s.start_ts, + s.duration, + s.events_count, + s.pages_count, + s.errors_count + {query_part} + ORDER BY s.session_id + LIMIT 50;""", { + "projectId": project_id, + "userId": user_id, + "startDate": start_date, + "endDate": end_date + })) + + sessions = cur.fetchall() + return helper.list_to_camel_case(sessions) + + +def get_session_user(project_id, user_id): + with pg_client.PostgresClient() as cur: + query = cur.mogrify( + """\ + SELECT + user_id, + count(*) as session_count, + max(start_ts) as last_seen, + min(start_ts) as first_seen + FROM + "public".sessions + WHERE + project_id = %(project_id)s + AND user_id = %(userId)s + AND duration is not null + GROUP BY user_id; + """, + {"project_id": project_id, "userId": user_id} + ) + cur.execute(query=query) + data = cur.fetchone() + return helper.dict_to_camel_case(data) + + +def get_session_ids_by_user_ids(project_id, user_ids): + with pg_client.PostgresClient() as cur: + query = cur.mogrify( + """\ + SELECT session_id FROM public.sessions + WHERE + project_id = %(project_id)s AND user_id IN %(userId)s;""", + {"project_id": project_id, "userId": tuple(user_ids)} + ) + ids = cur.execute(query=query) + return ids + + +def delete_sessions_by_session_ids(session_ids): + with pg_client.PostgresClient(unlimited_query=True) as cur: + query = cur.mogrify( + """\ + DELETE FROM public.sessions + WHERE + session_id IN %(session_ids)s;""", + {"session_ids": tuple(session_ids)} + ) + cur.execute(query=query) + + return True + + +def delete_sessions_by_user_ids(project_id, user_ids): + with pg_client.PostgresClient(unlimited_query=True) as cur: + query = cur.mogrify( + """\ + DELETE FROM public.sessions + WHERE + project_id = %(project_id)s AND user_id IN %(userId)s;""", + {"project_id": project_id, "userId": tuple(user_ids)} + ) + cur.execute(query=query) + + return True + + +def count_all(): + with pg_client.PostgresClient(unlimited_query=True) as cur: + row = cur.execute(query="SELECT COUNT(session_id) AS count FROM public.sessions") + return row.get("count", 0) diff --git a/ee/api/chalicelib/core/sessions_favorite_viewed.py b/ee/api/chalicelib/core/sessions_favorite.py similarity index 71% rename from ee/api/chalicelib/core/sessions_favorite_viewed.py rename to ee/api/chalicelib/core/sessions_favorite.py index 896ba4a99..c5487376d 100644 --- a/ee/api/chalicelib/core/sessions_favorite_viewed.py +++ b/ee/api/chalicelib/core/sessions_favorite.py @@ -1,18 +1,19 @@ -from chalicelib.core import sessions -from chalicelib.utils import pg_client, s3_extra from decouple import config +from chalicelib.core import sessions, sessions_favorite_exp +from chalicelib.utils import pg_client, s3_extra + def add_favorite_session(project_id, user_id, session_id): with pg_client.PostgresClient() as cur: cur.execute( cur.mogrify(f"""\ - INSERT INTO public.user_favorite_sessions - (user_id, session_id) - VALUES - (%(userId)s,%(sessionId)s);""", + INSERT INTO public.user_favorite_sessions(user_id, session_id) + VALUES (%(userId)s,%(sessionId)s);""", {"userId": user_id, "sessionId": session_id}) ) + + sessions_favorite_exp.add_favorite_session(project_id=project_id, user_id=user_id, session_id=session_id) return sessions.get_by_id2_pg(project_id=project_id, session_id=session_id, user_id=user_id, full_data=False, include_fav_viewed=True) @@ -22,28 +23,15 @@ def remove_favorite_session(project_id, user_id, session_id): cur.execute( cur.mogrify(f"""\ DELETE FROM public.user_favorite_sessions - WHERE - user_id = %(userId)s + WHERE user_id = %(userId)s AND session_id = %(sessionId)s;""", {"userId": user_id, "sessionId": session_id}) ) + sessions_favorite_exp.remove_favorite_session(project_id=project_id, user_id=user_id, session_id=session_id) return sessions.get_by_id2_pg(project_id=project_id, session_id=session_id, user_id=user_id, full_data=False, include_fav_viewed=True) -def add_viewed_session(project_id, user_id, session_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify("""\ - INSERT INTO public.user_viewed_sessions - (user_id, session_id) - VALUES - (%(userId)s,%(sessionId)s) - ON CONFLICT DO NOTHING;""", - {"userId": user_id, "sessionId": session_id}) - ) - - def favorite_session(project_id, user_id, session_id): if favorite_session_exists(user_id=user_id, session_id=session_id): key = str(session_id) @@ -74,16 +62,11 @@ def favorite_session(project_id, user_id, session_id): return add_favorite_session(project_id=project_id, user_id=user_id, session_id=session_id) -def view_session(project_id, user_id, session_id): - return add_viewed_session(project_id=project_id, user_id=user_id, session_id=session_id) - - def favorite_session_exists(user_id, session_id): with pg_client.PostgresClient() as cur: cur.execute( cur.mogrify( - """SELECT - session_id + """SELECT session_id FROM public.user_favorite_sessions WHERE user_id = %(userId)s @@ -92,3 +75,18 @@ def favorite_session_exists(user_id, session_id): ) r = cur.fetchone() return r is not None + + +def get_start_end_timestamp(project_id, user_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify( + """SELECT max(start_ts) AS max_start_ts, min(start_ts) AS min_start_ts + FROM public.user_favorite_sessions INNER JOIN sessions USING(session_id) + WHERE + user_favorite_sessions.user_id = %(userId)s + AND project_id = %(project_id)s;""", + {"userId": user_id, "project_id": project_id}) + ) + r = cur.fetchone() + return 0, 0 if r is None else r["max_start_ts"], r["min_start_ts"] diff --git a/ee/api/chalicelib/core/sessions_favorite_exp.py b/ee/api/chalicelib/core/sessions_favorite_exp.py new file mode 100644 index 000000000..6ee8654b0 --- /dev/null +++ b/ee/api/chalicelib/core/sessions_favorite_exp.py @@ -0,0 +1,24 @@ +import logging + +from decouple import config + +from chalicelib.utils import ch_client, exp_ch_helper + +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + + +def add_favorite_session(project_id, user_id, session_id, sign=1): + try: + with ch_client.ClickHouseClient() as cur: + query = f"""INSERT INTO {exp_ch_helper.get_user_favorite_sessions_table()}(project_id,user_id, session_id, sign) + VALUES (%(project_id)s,%(userId)s,%(sessionId)s,%(sign)s);""" + params = {"userId": user_id, "sessionId": session_id, "project_id": project_id, "sign": sign} + cur.execute(query=query, params=params) + + except Exception as err: + logging.error("------- Exception while adding favorite session to CH") + logging.error(err) + + +def remove_favorite_session(project_id, user_id, session_id): + add_favorite_session(project_id=project_id, user_id=user_id, session_id=session_id, sign=-1) diff --git a/ee/api/chalicelib/core/sessions_viewed.py b/ee/api/chalicelib/core/sessions_viewed.py new file mode 100644 index 000000000..59bb55c75 --- /dev/null +++ b/ee/api/chalicelib/core/sessions_viewed.py @@ -0,0 +1,13 @@ +from chalicelib.core import sessions_viewed_exp +from chalicelib.utils import pg_client + + +def view_session(project_id, user_id, session_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify("""INSERT INTO public.user_viewed_sessions (user_id, session_id) + VALUES (%(userId)s,%(sessionId)s) + ON CONFLICT DO NOTHING;""", + {"userId": user_id, "sessionId": session_id}) + ) + sessions_viewed_exp.view_session(project_id=project_id, user_id=user_id, session_id=session_id) diff --git a/ee/api/chalicelib/core/sessions_viewed_exp.py b/ee/api/chalicelib/core/sessions_viewed_exp.py new file mode 100644 index 000000000..3b26612cb --- /dev/null +++ b/ee/api/chalicelib/core/sessions_viewed_exp.py @@ -0,0 +1,17 @@ +from chalicelib.utils import ch_client, exp_ch_helper +import logging +from decouple import config + +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + + +def view_session(project_id, user_id, session_id): + try: + with ch_client.ClickHouseClient() as cur: + query = f"""INSERT INTO {exp_ch_helper.get_user_viewed_sessions_table()}(project_id, user_id, session_id) + VALUES (%(project_id)s,%(userId)s,%(sessionId)s);""" + params = {"userId": user_id, "sessionId": session_id, "project_id": project_id} + cur.execute(query=query, params=params) + except Exception as err: + logging.error("------- Exception while adding viewed session to CH") + logging.error(err) diff --git a/ee/api/chalicelib/core/signup.py b/ee/api/chalicelib/core/signup.py index b8b0a3e4a..72317859f 100644 --- a/ee/api/chalicelib/core/signup.py +++ b/ee/api/chalicelib/core/signup.py @@ -43,7 +43,7 @@ def create_step1(data: schemas.UserSignupSchema): print("Verifying company's name validity") company_name = data.organizationName - if company_name is None or len(company_name) < 1 or not helper.is_alphanumeric_space(company_name): + if company_name is None or len(company_name) < 1: errors.append("invalid organization's name") print("Verifying project's name validity") diff --git a/ee/api/chalicelib/core/users.py b/ee/api/chalicelib/core/users.py index ff43cca41..9201350e9 100644 --- a/ee/api/chalicelib/core/users.py +++ b/ee/api/chalicelib/core/users.py @@ -212,7 +212,7 @@ def create_member(tenant_id, user_id, data, background_tasks: BackgroundTasks): if user: return {"errors": ["user already exists"]} name = data.get("name", None) - if name is not None and not helper.is_alphabet_latin_space(name): + if name is not None and len(name) == 0: return {"errors": ["invalid user name"]} if name is None: name = data["email"] diff --git a/ee/api/chalicelib/utils/ch_client.py b/ee/api/chalicelib/utils/ch_client.py index a51230a19..1c9d0b01d 100644 --- a/ee/api/chalicelib/utils/ch_client.py +++ b/ee/api/chalicelib/utils/ch_client.py @@ -1,6 +1,19 @@ +import logging + import clickhouse_driver from decouple import config +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + +settings = {} +if config('ch_timeout', cast=int, default=-1) > 0: + logging.info(f"CH-max_execution_time set to {config('ch_timeout')}s") + settings = {**settings, "max_execution_time": config('ch_timeout', cast=int)} + +if config('ch_receive_timeout', cast=int, default=-1) > 0: + logging.info(f"CH-receive_timeout set to {config('ch_receive_timeout')}s") + settings = {**settings, "receive_timeout": config('ch_receive_timeout', cast=int)} + class ClickHouseClient: __client = None @@ -8,16 +21,23 @@ class ClickHouseClient: def __init__(self): self.__client = clickhouse_driver.Client(host=config("ch_host"), database="default", - port=config("ch_port", cast=int)) \ + port=config("ch_port", cast=int), + settings=settings) \ if self.__client is None else self.__client def __enter__(self): return self def execute(self, query, params=None, **args): - results = self.__client.execute(query=query, params=params, with_column_types=True, **args) - keys = tuple(x for x, y in results[1]) - return [dict(zip(keys, i)) for i in results[0]] + try: + results = self.__client.execute(query=query, params=params, with_column_types=True, **args) + keys = tuple(x for x, y in results[1]) + return [dict(zip(keys, i)) for i in results[0]] + except Exception as err: + logging.error("--------- CH QUERY EXCEPTION -----------") + logging.error(self.format(query=query, params=params)) + logging.error("--------------------") + raise err def insert(self, query, params=None, **args): return self.__client.execute(query=query, params=params, **args) diff --git a/ee/api/chalicelib/utils/exp_ch_helper.py b/ee/api/chalicelib/utils/exp_ch_helper.py new file mode 100644 index 000000000..fb6781184 --- /dev/null +++ b/ee/api/chalicelib/utils/exp_ch_helper.py @@ -0,0 +1,42 @@ +from chalicelib.utils.TimeUTC import TimeUTC +from decouple import config +import logging + +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + +if config("EXP_7D_MV", cast=bool, default=True): + print(">>> Using experimental last 7 days materialized views") + + +def get_main_events_table(timestamp): + return "experimental.events_l7d_mv" \ + if config("EXP_7D_MV", cast=bool, default=True) \ + and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.events" + + +def get_main_sessions_table(timestamp): + return "experimental.sessions_l7d_mv" \ + if config("EXP_7D_MV", cast=bool, default=True) \ + and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.sessions" + + +def get_main_resources_table(timestamp): + return "experimental.resources_l7s_mv" \ + if config("EXP_7D_MV", cast=bool, default=True) \ + and timestamp >= TimeUTC.now(delta_days=-7) else "experimental.resources" + + +def get_autocomplete_table(timestamp=0): + return "experimental.autocomplete" + + +def get_user_favorite_sessions_table(timestamp=0): + return "experimental.user_favorite_sessions" + + +def get_user_viewed_sessions_table(timestamp=0): + return "experimental.user_viewed_sessions" + + +def get_user_viewed_errors_table(timestamp=0): + return "experimental.user_viewed_errors" diff --git a/ee/api/clean.sh b/ee/api/clean.sh index fa1ab8cb5..0974875cf 100755 --- a/ee/api/clean.sh +++ b/ee/api/clean.sh @@ -3,8 +3,10 @@ rm -rf ./chalicelib/core/alerts.py rm -rf ./chalicelib/core/alerts_processor.py rm -rf ./chalicelib/core/announcements.py +rm -rf ./chalicelib/core/autocomplete.py rm -rf ./chalicelib/core/collaboration_slack.py -rm -rf ./chalicelib/core/errors_favorite_viewed.py +rm -rf ./chalicelib/core/errors.py +rm -rf ./chalicelib/core/errors_favorite.py rm -rf ./chalicelib/core/events.py rm -rf ./chalicelib/core/events_ios.py rm -rf ./chalicelib/core/dashboards.py diff --git a/ee/api/env.default b/ee/api/env.default index 41d9b6b45..13976e436 100644 --- a/ee/api/env.default +++ b/ee/api/env.default @@ -21,6 +21,8 @@ captcha_key= captcha_server= ch_host= ch_port= +ch_timeout=30 +ch_receive_timeout=10 change_password_link=/reset-password?invitation=%s&&pass=%s email_basic=http://127.0.0.1:8000/async/basic/%s email_plans=http://127.0.0.1:8000/async/plans/%s @@ -58,4 +60,9 @@ sourcemaps_bucket=sourcemaps sourcemaps_reader=http://127.0.0.1:9000/sourcemaps stage=default-ee version_number=1.0.0 -FS_DIR=/mnt/efs \ No newline at end of file +FS_DIR=/mnt/efs +EXP_SESSIONS_SEARCH=true +EXP_AUTOCOMPLETE=true +EXP_ERRORS_SEARCH=true +EXP_METRICS=true +EXP_7D_MV=true \ No newline at end of file diff --git a/ee/api/requirements-alerts.txt b/ee/api/requirements-alerts.txt index 906189999..475a39b5e 100644 --- a/ee/api/requirements-alerts.txt +++ b/ee/api/requirements-alerts.txt @@ -1,17 +1,17 @@ requests==2.28.1 urllib3==1.26.10 -boto3==1.24.26 +boto3==1.24.53 pyjwt==2.4.0 psycopg2-binary==2.9.3 -elasticsearch==8.3.1 +elasticsearch==8.3.3 jira==3.3.1 -fastapi==0.78.0 +fastapi==0.80.0 uvicorn[standard]==0.18.2 python-decouple==3.6 -pydantic[email]==1.9.1 +pydantic[email]==1.9.2 apscheduler==3.9.1 clickhouse-driver==0.2.4 diff --git a/ee/api/requirements-crons.txt b/ee/api/requirements-crons.txt index 906189999..475a39b5e 100644 --- a/ee/api/requirements-crons.txt +++ b/ee/api/requirements-crons.txt @@ -1,17 +1,17 @@ requests==2.28.1 urllib3==1.26.10 -boto3==1.24.26 +boto3==1.24.53 pyjwt==2.4.0 psycopg2-binary==2.9.3 -elasticsearch==8.3.1 +elasticsearch==8.3.3 jira==3.3.1 -fastapi==0.78.0 +fastapi==0.80.0 uvicorn[standard]==0.18.2 python-decouple==3.6 -pydantic[email]==1.9.1 +pydantic[email]==1.9.2 apscheduler==3.9.1 clickhouse-driver==0.2.4 diff --git a/ee/api/requirements.txt b/ee/api/requirements.txt index 0a8ca819e..bdf363b7b 100644 --- a/ee/api/requirements.txt +++ b/ee/api/requirements.txt @@ -1,17 +1,17 @@ requests==2.28.1 urllib3==1.26.10 -boto3==1.24.26 +boto3==1.24.53 pyjwt==2.4.0 psycopg2-binary==2.9.3 -elasticsearch==8.3.1 +elasticsearch==8.3.3 jira==3.3.1 -fastapi==0.78.0 +fastapi==0.80.0 uvicorn[standard]==0.18.2 python-decouple==3.6 -pydantic[email]==1.9.1 +pydantic[email]==1.9.2 apscheduler==3.9.1 clickhouse-driver==0.2.4 diff --git a/ee/api/schemas_ee.py b/ee/api/schemas_ee.py index 0375521ad..458bdc052 100644 --- a/ee/api/schemas_ee.py +++ b/ee/api/schemas_ee.py @@ -43,3 +43,27 @@ class TrailSearchPayloadSchema(schemas._PaginatedSchema): class Config: alias_generator = schemas.attribute_to_camel_case + + +class SessionModel(BaseModel): + viewed: bool = Field(default=False) + userId: Optional[str] + userOs: str + duration: int + favorite: bool = Field(default=False) + platform: str + startTs: int + userUuid: str + projectId: int + sessionId: str + issueScore: int + issueTypes: List[schemas.IssueType] = Field(default=[]) + pagesCount: int + userDevice: Optional[str] + errorsCount: int + eventsCount: int + userBrowser: str + userCountry: str + userDeviceType: str + userAnonymousId: Optional[str] + metadata: dict = Field(default={}) diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_1_database.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_1_database.sql new file mode 100644 index 000000000..20f7016b3 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_1_database.sql @@ -0,0 +1 @@ +CREATE DATABASE IF NOT EXISTS experimental; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_autocomplete.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_autocomplete.sql new file mode 100644 index 000000000..a6d3dd21f --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_autocomplete.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS experimental.autocomplete +( + project_id UInt16, + type LowCardinality(String), + value String, + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, type, value) + TTL _timestamp + INTERVAL 1 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_events.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_events.sql new file mode 100644 index 000000000..726415f3a --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_events.sql @@ -0,0 +1,72 @@ +CREATE TABLE IF NOT EXISTS experimental.events +( + session_id UInt64, + project_id UInt16, + event_type Enum8('CLICK'=0, 'INPUT'=1, 'LOCATION'=2,'REQUEST'=3,'PERFORMANCE'=4,'LONGTASK'=5,'ERROR'=6,'CUSTOM'=7, 'GRAPHQL'=8, 'STATEACTION'=9), + datetime DateTime, + label Nullable(String), + hesitation_time Nullable(UInt32), + name Nullable(String), + payload Nullable(String), + level Nullable(Enum8('info'=0, 'error'=1)) DEFAULT if(event_type == 'CUSTOM', 'info', null), + source Nullable(Enum8('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9)), + message Nullable(String), + error_id Nullable(String), + duration Nullable(UInt16), + context Nullable(Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8)), + container_type Nullable(Enum8('window'=0, 'iframe'=1, 'embed'=2, 'object'=3)), + container_id Nullable(String), + container_name Nullable(String), + container_src Nullable(String), + url Nullable(String), + url_host Nullable(String) MATERIALIZED lower(domain(url)), + url_path Nullable(String) MATERIALIZED lower(pathFull(url)), + url_hostpath Nullable(String) MATERIALIZED concat(url_host, url_path), + request_start Nullable(UInt16), + response_start Nullable(UInt16), + response_end Nullable(UInt16), + dom_content_loaded_event_start Nullable(UInt16), + dom_content_loaded_event_end Nullable(UInt16), + load_event_start Nullable(UInt16), + load_event_end Nullable(UInt16), + first_paint Nullable(UInt16), + first_contentful_paint_time Nullable(UInt16), + speed_index Nullable(UInt16), + visually_complete Nullable(UInt16), + time_to_interactive Nullable(UInt16), + ttfb Nullable(UInt16) MATERIALIZED if(greaterOrEquals(response_start, request_start), + minus(response_start, request_start), Null), + ttlb Nullable(UInt16) MATERIALIZED if(greaterOrEquals(response_end, request_start), + minus(response_end, request_start), Null), + response_time Nullable(UInt16) MATERIALIZED if(greaterOrEquals(response_end, response_start), + minus(response_end, response_start), Null), + dom_building_time Nullable(UInt16) MATERIALIZED if( + greaterOrEquals(dom_content_loaded_event_start, response_end), + minus(dom_content_loaded_event_start, response_end), Null), + dom_content_loaded_event_time Nullable(UInt16) MATERIALIZED if( + greaterOrEquals(dom_content_loaded_event_end, dom_content_loaded_event_start), + minus(dom_content_loaded_event_end, dom_content_loaded_event_start), Null), + load_event_time Nullable(UInt16) MATERIALIZED if(greaterOrEquals(load_event_end, load_event_start), + minus(load_event_end, load_event_start), Null), + min_fps Nullable(UInt8), + avg_fps Nullable(UInt8), + max_fps Nullable(UInt8), + min_cpu Nullable(UInt8), + avg_cpu Nullable(UInt8), + max_cpu Nullable(UInt8), + min_total_js_heap_size Nullable(UInt64), + avg_total_js_heap_size Nullable(UInt64), + max_total_js_heap_size Nullable(UInt64), + min_used_js_heap_size Nullable(UInt64), + avg_used_js_heap_size Nullable(UInt64), + max_used_js_heap_size Nullable(UInt64), + method Nullable(Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8)), + status Nullable(UInt16), + success Nullable(UInt8), + request_body Nullable(String), + response_body Nullable(String), + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, event_type, session_id) + TTL datetime + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_resources.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_resources.sql new file mode 100644 index 000000000..c6930df67 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_resources.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS experimental.resources +( + session_id UInt64, + project_id UInt16, + datetime DateTime, + url String, + url_host String MATERIALIZED lower(domain(url)), + url_path String MATERIALIZED lower(path(url)), + url_hostpath String MATERIALIZED concat(url_host, url_path), + type Enum8('other'=-1, 'script'=0, 'stylesheet'=1, 'fetch'=2, 'img'=3, 'media'=4), + name Nullable(String) MATERIALIZED if(type = 'fetch', null, + coalesce(nullIf(splitByChar('/', url_path)[-1], ''), + nullIf(splitByChar('/', url_path)[-2], ''))), + duration Nullable(UInt16), + ttfb Nullable(UInt16), + header_size Nullable(UInt16), + encoded_body_size Nullable(UInt32), + decoded_body_size Nullable(UInt32), + compression_ratio Nullable(Float32) MATERIALIZED divide(decoded_body_size, encoded_body_size), + success Nullable(UInt8) COMMENT 'currently available for type=img only', + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, type, session_id) + TTL datetime + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_sessions.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_sessions.sql new file mode 100644 index 000000000..3ef4156f2 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_sessions.sql @@ -0,0 +1,42 @@ +CREATE TABLE IF NOT EXISTS experimental.sessions +( + session_id UInt64, + project_id UInt16, + tracker_version LowCardinality(String), + rev_id LowCardinality(Nullable(String)), + user_uuid UUID, + user_os LowCardinality(String), + user_os_version LowCardinality(Nullable(String)), + user_browser LowCardinality(String), + user_browser_version LowCardinality(Nullable(String)), + user_device Nullable(String), + user_device_type Enum8('other'=0, 'desktop'=1, 'mobile'=2), + user_country Enum8('UN'=-128, 'RW'=-127, 'SO'=-126, 'YE'=-125, 'IQ'=-124, 'SA'=-123, 'IR'=-122, 'CY'=-121, 'TZ'=-120, 'SY'=-119, 'AM'=-118, 'KE'=-117, 'CD'=-116, 'DJ'=-115, 'UG'=-114, 'CF'=-113, 'SC'=-112, 'JO'=-111, 'LB'=-110, 'KW'=-109, 'OM'=-108, 'QA'=-107, 'BH'=-106, 'AE'=-105, 'IL'=-104, 'TR'=-103, 'ET'=-102, 'ER'=-101, 'EG'=-100, 'SD'=-99, 'GR'=-98, 'BI'=-97, 'EE'=-96, 'LV'=-95, 'AZ'=-94, 'LT'=-93, 'SJ'=-92, 'GE'=-91, 'MD'=-90, 'BY'=-89, 'FI'=-88, 'AX'=-87, 'UA'=-86, 'MK'=-85, 'HU'=-84, 'BG'=-83, 'AL'=-82, 'PL'=-81, 'RO'=-80, 'XK'=-79, 'ZW'=-78, 'ZM'=-77, 'KM'=-76, 'MW'=-75, 'LS'=-74, 'BW'=-73, 'MU'=-72, 'SZ'=-71, 'RE'=-70, 'ZA'=-69, 'YT'=-68, 'MZ'=-67, 'MG'=-66, 'AF'=-65, 'PK'=-64, 'BD'=-63, 'TM'=-62, 'TJ'=-61, 'LK'=-60, 'BT'=-59, 'IN'=-58, 'MV'=-57, 'IO'=-56, 'NP'=-55, 'MM'=-54, 'UZ'=-53, 'KZ'=-52, 'KG'=-51, 'TF'=-50, 'HM'=-49, 'CC'=-48, 'PW'=-47, 'VN'=-46, 'TH'=-45, 'ID'=-44, 'LA'=-43, 'TW'=-42, 'PH'=-41, 'MY'=-40, 'CN'=-39, 'HK'=-38, 'BN'=-37, 'MO'=-36, 'KH'=-35, 'KR'=-34, 'JP'=-33, 'KP'=-32, 'SG'=-31, 'CK'=-30, 'TL'=-29, 'RU'=-28, 'MN'=-27, 'AU'=-26, 'CX'=-25, 'MH'=-24, 'FM'=-23, 'PG'=-22, 'SB'=-21, 'TV'=-20, 'NR'=-19, 'VU'=-18, 'NC'=-17, 'NF'=-16, 'NZ'=-15, 'FJ'=-14, 'LY'=-13, 'CM'=-12, 'SN'=-11, 'CG'=-10, 'PT'=-9, 'LR'=-8, 'CI'=-7, 'GH'=-6, 'GQ'=-5, 'NG'=-4, 'BF'=-3, 'TG'=-2, 'GW'=-1, 'MR'=0, 'BJ'=1, 'GA'=2, 'SL'=3, 'ST'=4, 'GI'=5, 'GM'=6, 'GN'=7, 'TD'=8, 'NE'=9, 'ML'=10, 'EH'=11, 'TN'=12, 'ES'=13, 'MA'=14, 'MT'=15, 'DZ'=16, 'FO'=17, 'DK'=18, 'IS'=19, 'GB'=20, 'CH'=21, 'SE'=22, 'NL'=23, 'AT'=24, 'BE'=25, 'DE'=26, 'LU'=27, 'IE'=28, 'MC'=29, 'FR'=30, 'AD'=31, 'LI'=32, 'JE'=33, 'IM'=34, 'GG'=35, 'SK'=36, 'CZ'=37, 'NO'=38, 'VA'=39, 'SM'=40, 'IT'=41, 'SI'=42, 'ME'=43, 'HR'=44, 'BA'=45, 'AO'=46, 'NA'=47, 'SH'=48, 'BV'=49, 'BB'=50, 'CV'=51, 'GY'=52, 'GF'=53, 'SR'=54, 'PM'=55, 'GL'=56, 'PY'=57, 'UY'=58, 'BR'=59, 'FK'=60, 'GS'=61, 'JM'=62, 'DO'=63, 'CU'=64, 'MQ'=65, 'BS'=66, 'BM'=67, 'AI'=68, 'TT'=69, 'KN'=70, 'DM'=71, 'AG'=72, 'LC'=73, 'TC'=74, 'AW'=75, 'VG'=76, 'VC'=77, 'MS'=78, 'MF'=79, 'BL'=80, 'GP'=81, 'GD'=82, 'KY'=83, 'BZ'=84, 'SV'=85, 'GT'=86, 'HN'=87, 'NI'=88, 'CR'=89, 'VE'=90, 'EC'=91, 'CO'=92, 'PA'=93, 'HT'=94, 'AR'=95, 'CL'=96, 'BO'=97, 'PE'=98, 'MX'=99, 'PF'=100, 'PN'=101, 'KI'=102, 'TK'=103, 'TO'=104, 'WF'=105, 'WS'=106, 'NU'=107, 'MP'=108, 'GU'=109, 'PR'=110, 'VI'=111, 'UM'=112, 'AS'=113, 'CA'=114, 'US'=115, 'PS'=116, 'RS'=117, 'AQ'=118, 'SX'=119, 'CW'=120, 'BQ'=121, 'SS'=122), + datetime DateTime, + duration UInt32, + pages_count UInt16, + events_count UInt16, + errors_count UInt16, + utm_source Nullable(String), + utm_medium Nullable(String), + utm_campaign Nullable(String), + user_id Nullable(String), + metadata_1 Nullable(String), + metadata_2 Nullable(String), + metadata_3 Nullable(String), + metadata_4 Nullable(String), + metadata_5 Nullable(String), + metadata_6 Nullable(String), + metadata_7 Nullable(String), + metadata_8 Nullable(String), + metadata_9 Nullable(String), + metadata_10 Nullable(String), + issue_types Array(LowCardinality(String)), + referrer Nullable(String), + base_referrer Nullable(String), + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMMDD(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 3 MONTH + SETTINGS index_granularity = 512; diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_favorite_sessions.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_favorite_sessions.sql new file mode 100644 index 000000000..5a65dc790 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_favorite_sessions.sql @@ -0,0 +1,11 @@ +CREATE TABLE IF NOT EXISTS experimental.user_favorite_sessions +( + project_id UInt16, + user_id UInt32, + session_id UInt64, + _timestamp DateTime DEFAULT now(), + sign Int8 +) ENGINE = CollapsingMergeTree(sign) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, user_id, session_id) + TTL _timestamp + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_viewed_sessions.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_viewed_sessions.sql new file mode 100644 index 000000000..ed1326399 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_viewed_sessions.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS experimental.user_viewed_sessions +( + project_id UInt16, + user_id UInt32, + session_id UInt64, + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, user_id, session_id) + TTL _timestamp + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_viewer_errors.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_viewer_errors.sql new file mode 100644 index 000000000..5fe21b8bb --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_2_user_viewer_errors.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS experimental.user_viewed_errors +( + project_id UInt16, + user_id UInt32, + error_id String, + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, user_id, error_id) + TTL _timestamp + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_events_mv.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_events_mv.sql new file mode 100644 index 000000000..136aea1c5 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_events_mv.sql @@ -0,0 +1,10 @@ +CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.events_l7d_mv + ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 7 DAY + POPULATE +AS +SELECT * +FROM experimental.events +WHERE datetime >= now() - INTERVAL 7 DAY; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_resources_mv.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_resources_mv.sql new file mode 100644 index 000000000..a5a6fb403 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_resources_mv.sql @@ -0,0 +1,10 @@ +CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.resources_l7d_mv + ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 7 DAY + POPULATE +AS +SELECT * +FROM experimental.resources +WHERE datetime >= now() - INTERVAL 7 DAY; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_sessions_mv.sql b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_sessions_mv.sql new file mode 100644 index 000000000..eea689374 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/1.8.0/exp_3_sessions_mv.sql @@ -0,0 +1,13 @@ +CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.sessions_l7d_mv + ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMMDD(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 7 DAY + SETTINGS index_granularity = 512 + POPULATE +AS +SELECT * +FROM experimental.sessions +WHERE datetime >= now() - INTERVAL 7 DAY + AND isNotNull(duration) + AND duration > 0; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_1_database.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_1_database.sql new file mode 100644 index 000000000..20f7016b3 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_1_database.sql @@ -0,0 +1 @@ +CREATE DATABASE IF NOT EXISTS experimental; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_autocomplete.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_autocomplete.sql new file mode 100644 index 000000000..a6d3dd21f --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_autocomplete.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS experimental.autocomplete +( + project_id UInt16, + type LowCardinality(String), + value String, + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, type, value) + TTL _timestamp + INTERVAL 1 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_events.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_events.sql new file mode 100644 index 000000000..726415f3a --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_events.sql @@ -0,0 +1,72 @@ +CREATE TABLE IF NOT EXISTS experimental.events +( + session_id UInt64, + project_id UInt16, + event_type Enum8('CLICK'=0, 'INPUT'=1, 'LOCATION'=2,'REQUEST'=3,'PERFORMANCE'=4,'LONGTASK'=5,'ERROR'=6,'CUSTOM'=7, 'GRAPHQL'=8, 'STATEACTION'=9), + datetime DateTime, + label Nullable(String), + hesitation_time Nullable(UInt32), + name Nullable(String), + payload Nullable(String), + level Nullable(Enum8('info'=0, 'error'=1)) DEFAULT if(event_type == 'CUSTOM', 'info', null), + source Nullable(Enum8('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9)), + message Nullable(String), + error_id Nullable(String), + duration Nullable(UInt16), + context Nullable(Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8)), + container_type Nullable(Enum8('window'=0, 'iframe'=1, 'embed'=2, 'object'=3)), + container_id Nullable(String), + container_name Nullable(String), + container_src Nullable(String), + url Nullable(String), + url_host Nullable(String) MATERIALIZED lower(domain(url)), + url_path Nullable(String) MATERIALIZED lower(pathFull(url)), + url_hostpath Nullable(String) MATERIALIZED concat(url_host, url_path), + request_start Nullable(UInt16), + response_start Nullable(UInt16), + response_end Nullable(UInt16), + dom_content_loaded_event_start Nullable(UInt16), + dom_content_loaded_event_end Nullable(UInt16), + load_event_start Nullable(UInt16), + load_event_end Nullable(UInt16), + first_paint Nullable(UInt16), + first_contentful_paint_time Nullable(UInt16), + speed_index Nullable(UInt16), + visually_complete Nullable(UInt16), + time_to_interactive Nullable(UInt16), + ttfb Nullable(UInt16) MATERIALIZED if(greaterOrEquals(response_start, request_start), + minus(response_start, request_start), Null), + ttlb Nullable(UInt16) MATERIALIZED if(greaterOrEquals(response_end, request_start), + minus(response_end, request_start), Null), + response_time Nullable(UInt16) MATERIALIZED if(greaterOrEquals(response_end, response_start), + minus(response_end, response_start), Null), + dom_building_time Nullable(UInt16) MATERIALIZED if( + greaterOrEquals(dom_content_loaded_event_start, response_end), + minus(dom_content_loaded_event_start, response_end), Null), + dom_content_loaded_event_time Nullable(UInt16) MATERIALIZED if( + greaterOrEquals(dom_content_loaded_event_end, dom_content_loaded_event_start), + minus(dom_content_loaded_event_end, dom_content_loaded_event_start), Null), + load_event_time Nullable(UInt16) MATERIALIZED if(greaterOrEquals(load_event_end, load_event_start), + minus(load_event_end, load_event_start), Null), + min_fps Nullable(UInt8), + avg_fps Nullable(UInt8), + max_fps Nullable(UInt8), + min_cpu Nullable(UInt8), + avg_cpu Nullable(UInt8), + max_cpu Nullable(UInt8), + min_total_js_heap_size Nullable(UInt64), + avg_total_js_heap_size Nullable(UInt64), + max_total_js_heap_size Nullable(UInt64), + min_used_js_heap_size Nullable(UInt64), + avg_used_js_heap_size Nullable(UInt64), + max_used_js_heap_size Nullable(UInt64), + method Nullable(Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8)), + status Nullable(UInt16), + success Nullable(UInt8), + request_body Nullable(String), + response_body Nullable(String), + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, event_type, session_id) + TTL datetime + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_resources.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_resources.sql new file mode 100644 index 000000000..c6930df67 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_resources.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS experimental.resources +( + session_id UInt64, + project_id UInt16, + datetime DateTime, + url String, + url_host String MATERIALIZED lower(domain(url)), + url_path String MATERIALIZED lower(path(url)), + url_hostpath String MATERIALIZED concat(url_host, url_path), + type Enum8('other'=-1, 'script'=0, 'stylesheet'=1, 'fetch'=2, 'img'=3, 'media'=4), + name Nullable(String) MATERIALIZED if(type = 'fetch', null, + coalesce(nullIf(splitByChar('/', url_path)[-1], ''), + nullIf(splitByChar('/', url_path)[-2], ''))), + duration Nullable(UInt16), + ttfb Nullable(UInt16), + header_size Nullable(UInt16), + encoded_body_size Nullable(UInt32), + decoded_body_size Nullable(UInt32), + compression_ratio Nullable(Float32) MATERIALIZED divide(decoded_body_size, encoded_body_size), + success Nullable(UInt8) COMMENT 'currently available for type=img only', + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, type, session_id) + TTL datetime + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_sessions.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_sessions.sql new file mode 100644 index 000000000..3ef4156f2 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_sessions.sql @@ -0,0 +1,42 @@ +CREATE TABLE IF NOT EXISTS experimental.sessions +( + session_id UInt64, + project_id UInt16, + tracker_version LowCardinality(String), + rev_id LowCardinality(Nullable(String)), + user_uuid UUID, + user_os LowCardinality(String), + user_os_version LowCardinality(Nullable(String)), + user_browser LowCardinality(String), + user_browser_version LowCardinality(Nullable(String)), + user_device Nullable(String), + user_device_type Enum8('other'=0, 'desktop'=1, 'mobile'=2), + user_country Enum8('UN'=-128, 'RW'=-127, 'SO'=-126, 'YE'=-125, 'IQ'=-124, 'SA'=-123, 'IR'=-122, 'CY'=-121, 'TZ'=-120, 'SY'=-119, 'AM'=-118, 'KE'=-117, 'CD'=-116, 'DJ'=-115, 'UG'=-114, 'CF'=-113, 'SC'=-112, 'JO'=-111, 'LB'=-110, 'KW'=-109, 'OM'=-108, 'QA'=-107, 'BH'=-106, 'AE'=-105, 'IL'=-104, 'TR'=-103, 'ET'=-102, 'ER'=-101, 'EG'=-100, 'SD'=-99, 'GR'=-98, 'BI'=-97, 'EE'=-96, 'LV'=-95, 'AZ'=-94, 'LT'=-93, 'SJ'=-92, 'GE'=-91, 'MD'=-90, 'BY'=-89, 'FI'=-88, 'AX'=-87, 'UA'=-86, 'MK'=-85, 'HU'=-84, 'BG'=-83, 'AL'=-82, 'PL'=-81, 'RO'=-80, 'XK'=-79, 'ZW'=-78, 'ZM'=-77, 'KM'=-76, 'MW'=-75, 'LS'=-74, 'BW'=-73, 'MU'=-72, 'SZ'=-71, 'RE'=-70, 'ZA'=-69, 'YT'=-68, 'MZ'=-67, 'MG'=-66, 'AF'=-65, 'PK'=-64, 'BD'=-63, 'TM'=-62, 'TJ'=-61, 'LK'=-60, 'BT'=-59, 'IN'=-58, 'MV'=-57, 'IO'=-56, 'NP'=-55, 'MM'=-54, 'UZ'=-53, 'KZ'=-52, 'KG'=-51, 'TF'=-50, 'HM'=-49, 'CC'=-48, 'PW'=-47, 'VN'=-46, 'TH'=-45, 'ID'=-44, 'LA'=-43, 'TW'=-42, 'PH'=-41, 'MY'=-40, 'CN'=-39, 'HK'=-38, 'BN'=-37, 'MO'=-36, 'KH'=-35, 'KR'=-34, 'JP'=-33, 'KP'=-32, 'SG'=-31, 'CK'=-30, 'TL'=-29, 'RU'=-28, 'MN'=-27, 'AU'=-26, 'CX'=-25, 'MH'=-24, 'FM'=-23, 'PG'=-22, 'SB'=-21, 'TV'=-20, 'NR'=-19, 'VU'=-18, 'NC'=-17, 'NF'=-16, 'NZ'=-15, 'FJ'=-14, 'LY'=-13, 'CM'=-12, 'SN'=-11, 'CG'=-10, 'PT'=-9, 'LR'=-8, 'CI'=-7, 'GH'=-6, 'GQ'=-5, 'NG'=-4, 'BF'=-3, 'TG'=-2, 'GW'=-1, 'MR'=0, 'BJ'=1, 'GA'=2, 'SL'=3, 'ST'=4, 'GI'=5, 'GM'=6, 'GN'=7, 'TD'=8, 'NE'=9, 'ML'=10, 'EH'=11, 'TN'=12, 'ES'=13, 'MA'=14, 'MT'=15, 'DZ'=16, 'FO'=17, 'DK'=18, 'IS'=19, 'GB'=20, 'CH'=21, 'SE'=22, 'NL'=23, 'AT'=24, 'BE'=25, 'DE'=26, 'LU'=27, 'IE'=28, 'MC'=29, 'FR'=30, 'AD'=31, 'LI'=32, 'JE'=33, 'IM'=34, 'GG'=35, 'SK'=36, 'CZ'=37, 'NO'=38, 'VA'=39, 'SM'=40, 'IT'=41, 'SI'=42, 'ME'=43, 'HR'=44, 'BA'=45, 'AO'=46, 'NA'=47, 'SH'=48, 'BV'=49, 'BB'=50, 'CV'=51, 'GY'=52, 'GF'=53, 'SR'=54, 'PM'=55, 'GL'=56, 'PY'=57, 'UY'=58, 'BR'=59, 'FK'=60, 'GS'=61, 'JM'=62, 'DO'=63, 'CU'=64, 'MQ'=65, 'BS'=66, 'BM'=67, 'AI'=68, 'TT'=69, 'KN'=70, 'DM'=71, 'AG'=72, 'LC'=73, 'TC'=74, 'AW'=75, 'VG'=76, 'VC'=77, 'MS'=78, 'MF'=79, 'BL'=80, 'GP'=81, 'GD'=82, 'KY'=83, 'BZ'=84, 'SV'=85, 'GT'=86, 'HN'=87, 'NI'=88, 'CR'=89, 'VE'=90, 'EC'=91, 'CO'=92, 'PA'=93, 'HT'=94, 'AR'=95, 'CL'=96, 'BO'=97, 'PE'=98, 'MX'=99, 'PF'=100, 'PN'=101, 'KI'=102, 'TK'=103, 'TO'=104, 'WF'=105, 'WS'=106, 'NU'=107, 'MP'=108, 'GU'=109, 'PR'=110, 'VI'=111, 'UM'=112, 'AS'=113, 'CA'=114, 'US'=115, 'PS'=116, 'RS'=117, 'AQ'=118, 'SX'=119, 'CW'=120, 'BQ'=121, 'SS'=122), + datetime DateTime, + duration UInt32, + pages_count UInt16, + events_count UInt16, + errors_count UInt16, + utm_source Nullable(String), + utm_medium Nullable(String), + utm_campaign Nullable(String), + user_id Nullable(String), + metadata_1 Nullable(String), + metadata_2 Nullable(String), + metadata_3 Nullable(String), + metadata_4 Nullable(String), + metadata_5 Nullable(String), + metadata_6 Nullable(String), + metadata_7 Nullable(String), + metadata_8 Nullable(String), + metadata_9 Nullable(String), + metadata_10 Nullable(String), + issue_types Array(LowCardinality(String)), + referrer Nullable(String), + base_referrer Nullable(String), + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMMDD(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 3 MONTH + SETTINGS index_granularity = 512; diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_favorite_sessions.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_favorite_sessions.sql new file mode 100644 index 000000000..5a65dc790 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_favorite_sessions.sql @@ -0,0 +1,11 @@ +CREATE TABLE IF NOT EXISTS experimental.user_favorite_sessions +( + project_id UInt16, + user_id UInt32, + session_id UInt64, + _timestamp DateTime DEFAULT now(), + sign Int8 +) ENGINE = CollapsingMergeTree(sign) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, user_id, session_id) + TTL _timestamp + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_viewed_sessions.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_viewed_sessions.sql new file mode 100644 index 000000000..ed1326399 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_viewed_sessions.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS experimental.user_viewed_sessions +( + project_id UInt16, + user_id UInt32, + session_id UInt64, + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, user_id, session_id) + TTL _timestamp + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_viewer_errors.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_viewer_errors.sql new file mode 100644 index 000000000..5fe21b8bb --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_2_user_viewer_errors.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS experimental.user_viewed_errors +( + project_id UInt16, + user_id UInt32, + error_id String, + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMM(_timestamp) + ORDER BY (project_id, user_id, error_id) + TTL _timestamp + INTERVAL 3 MONTH; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_events_mv.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_events_mv.sql new file mode 100644 index 000000000..136aea1c5 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_events_mv.sql @@ -0,0 +1,10 @@ +CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.events_l7d_mv + ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 7 DAY + POPULATE +AS +SELECT * +FROM experimental.events +WHERE datetime >= now() - INTERVAL 7 DAY; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_resources_mv.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_resources_mv.sql new file mode 100644 index 000000000..a5a6fb403 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_resources_mv.sql @@ -0,0 +1,10 @@ +CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.resources_l7d_mv + ENGINE = MergeTree + PARTITION BY toYYYYMM(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 7 DAY + POPULATE +AS +SELECT * +FROM experimental.resources +WHERE datetime >= now() - INTERVAL 7 DAY; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_sessions_mv.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_sessions_mv.sql new file mode 100644 index 000000000..eea689374 --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/clickhouse/create/exp_3_sessions_mv.sql @@ -0,0 +1,13 @@ +CREATE MATERIALIZED VIEW IF NOT EXISTS experimental.sessions_l7d_mv + ENGINE = ReplacingMergeTree(_timestamp) + PARTITION BY toYYYYMMDD(datetime) + ORDER BY (project_id, datetime, session_id) + TTL datetime + INTERVAL 7 DAY + SETTINGS index_granularity = 512 + POPULATE +AS +SELECT * +FROM experimental.sessions +WHERE datetime >= now() - INTERVAL 7 DAY + AND isNotNull(duration) + AND duration > 0; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/negatives_buffer.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/negatives_buffer.sql deleted file mode 100644 index ac67028b3..000000000 --- a/ee/scripts/helm/db/init_dbs/clickhouse/create/negatives_buffer.sql +++ /dev/null @@ -1,215 +0,0 @@ -CREATE TABLE IF NOT EXISTS negatives_buffer -( - sessionid UInt64, - clickevent_hesitationtime Nullable(UInt64), - clickevent_label Nullable(String), - clickevent_messageid Nullable(UInt64), - clickevent_timestamp Nullable(Datetime), - connectioninformation_downlink Nullable(UInt64), - connectioninformation_type Nullable(String), - consolelog_level Nullable(String), - consolelog_value Nullable(String), - cpuissue_duration Nullable(UInt64), - cpuissue_rate Nullable(UInt64), - cpuissue_timestamp Nullable(Datetime), - createdocument Nullable(UInt8), - createelementnode_id Nullable(UInt64), - createelementnode_parentid Nullable(UInt64), - cssdeleterule_index Nullable(UInt64), - cssdeleterule_stylesheetid Nullable(UInt64), - cssinsertrule_index Nullable(UInt64), - cssinsertrule_rule Nullable(String), - cssinsertrule_stylesheetid Nullable(UInt64), - customevent_messageid Nullable(UInt64), - customevent_name Nullable(String), - customevent_payload Nullable(String), - customevent_timestamp Nullable(Datetime), - domdrop_timestamp Nullable(Datetime), - errorevent_message Nullable(String), - errorevent_messageid Nullable(UInt64), - errorevent_name Nullable(String), - errorevent_payload Nullable(String), - errorevent_source Nullable(String), - errorevent_timestamp Nullable(Datetime), - fetch_duration Nullable(UInt64), - fetch_method Nullable(String), - fetch_request Nullable(String), - fetch_status Nullable(UInt64), - fetch_timestamp Nullable(Datetime), - fetch_url Nullable(String), - graphql_operationkind Nullable(String), - graphql_operationname Nullable(String), - graphql_response Nullable(String), - graphql_variables Nullable(String), - graphqlevent_messageid Nullable(UInt64), - graphqlevent_name Nullable(String), - graphqlevent_timestamp Nullable(Datetime), - inputevent_label Nullable(String), - inputevent_messageid Nullable(UInt64), - inputevent_timestamp Nullable(Datetime), - inputevent_value Nullable(String), - inputevent_valuemasked Nullable(UInt8), - jsexception_message Nullable(String), - jsexception_name Nullable(String), - jsexception_payload Nullable(String), - longtasks_timestamp Nullable(Datetime), - longtasks_duration Nullable(UInt64), - longtasks_containerid Nullable(String), - longtasks_containersrc Nullable(String), - memoryissue_duration Nullable(UInt64), - memoryissue_rate Nullable(UInt64), - memoryissue_timestamp Nullable(Datetime), - metadata_key Nullable(String), - metadata_value Nullable(String), - mobx_payload Nullable(String), - mobx_type Nullable(String), - mouseclick_id Nullable(UInt64), - mouseclick_hesitationtime Nullable(UInt64), - mouseclick_label Nullable(String), - mousemove_x Nullable(UInt64), - mousemove_y Nullable(UInt64), - movenode_id Nullable(UInt64), - movenode_index Nullable(UInt64), - movenode_parentid Nullable(UInt64), - ngrx_action Nullable(String), - ngrx_duration Nullable(UInt64), - ngrx_state Nullable(String), - pageevent_domcontentloadedeventend Nullable(UInt64), - pageevent_domcontentloadedeventstart Nullable(UInt64), - pageevent_firstcontentfulpaint Nullable(UInt64), - pageevent_firstpaint Nullable(UInt64), - pageevent_loaded Nullable(UInt8), - pageevent_loadeventend Nullable(UInt64), - pageevent_loadeventstart Nullable(UInt64), - pageevent_messageid Nullable(UInt64), - pageevent_referrer Nullable(String), - pageevent_requeststart Nullable(UInt64), - pageevent_responseend Nullable(UInt64), - pageevent_responsestart Nullable(UInt64), - pageevent_speedindex Nullable(UInt64), - pageevent_timestamp Nullable(Datetime), - pageevent_url Nullable(String), - pageloadtiming_domcontentloadedeventend Nullable(UInt64), - pageloadtiming_domcontentloadedeventstart Nullable(UInt64), - pageloadtiming_firstcontentfulpaint Nullable(UInt64), - pageloadtiming_firstpaint Nullable(UInt64), - pageloadtiming_loadeventend Nullable(UInt64), - pageloadtiming_loadeventstart Nullable(UInt64), - pageloadtiming_requeststart Nullable(UInt64), - pageloadtiming_responseend Nullable(UInt64), - pageloadtiming_responsestart Nullable(UInt64), - pagerendertiming_speedindex Nullable(UInt64), - pagerendertiming_timetointeractive Nullable(UInt64), - pagerendertiming_visuallycomplete Nullable(UInt64), - performancetrack_frames Nullable(Int64), - performancetrack_ticks Nullable(Int64), - performancetrack_totaljsheapsize Nullable(UInt64), - performancetrack_usedjsheapsize Nullable(UInt64), - performancetrackaggr_avgcpu Nullable(UInt64), - performancetrackaggr_avgfps Nullable(UInt64), - performancetrackaggr_avgtotaljsheapsize Nullable(UInt64), - performancetrackaggr_avgusedjsheapsize Nullable(UInt64), - performancetrackaggr_maxcpu Nullable(UInt64), - performancetrackaggr_maxfps Nullable(UInt64), - performancetrackaggr_maxtotaljsheapsize Nullable(UInt64), - performancetrackaggr_maxusedjsheapsize Nullable(UInt64), - performancetrackaggr_mincpu Nullable(UInt64), - performancetrackaggr_minfps Nullable(UInt64), - performancetrackaggr_mintotaljsheapsize Nullable(UInt64), - performancetrackaggr_minusedjsheapsize Nullable(UInt64), - performancetrackaggr_timestampend Nullable(Datetime), - performancetrackaggr_timestampstart Nullable(Datetime), - profiler_args Nullable(String), - profiler_duration Nullable(UInt64), - profiler_name Nullable(String), - profiler_result Nullable(String), - rawcustomevent_name Nullable(String), - rawcustomevent_payload Nullable(String), - rawerrorevent_message Nullable(String), - rawerrorevent_name Nullable(String), - rawerrorevent_payload Nullable(String), - rawerrorevent_source Nullable(String), - rawerrorevent_timestamp Nullable(Datetime), - redux_action Nullable(String), - redux_duration Nullable(UInt64), - redux_state Nullable(String), - removenode_id Nullable(UInt64), - removenodeattribute_id Nullable(UInt64), - removenodeattribute_name Nullable(String), - resourceevent_decodedbodysize Nullable(UInt64), - resourceevent_duration Nullable(UInt64), - resourceevent_encodedbodysize Nullable(UInt64), - resourceevent_headersize Nullable(UInt64), - resourceevent_messageid Nullable(UInt64), - resourceevent_method Nullable(String), - resourceevent_status Nullable(UInt64), - resourceevent_success Nullable(UInt8), - resourceevent_timestamp Nullable(Datetime), - resourceevent_ttfb Nullable(UInt64), - resourceevent_type Nullable(String), - resourceevent_url Nullable(String), - resourcetiming_decodedbodysize Nullable(UInt64), - resourcetiming_duration Nullable(UInt64), - resourcetiming_encodedbodysize Nullable(UInt64), - resourcetiming_headersize Nullable(UInt64), - resourcetiming_initiator Nullable(String), - resourcetiming_timestamp Nullable(Datetime), - resourcetiming_ttfb Nullable(UInt64), - resourcetiming_url Nullable(String), - sessiondisconnect Nullable(UInt8), - sessiondisconnect_timestamp Nullable(Datetime), - sessionend Nullable(UInt8), - sessionend_timestamp Nullable(Datetime), - sessionstart_projectid Nullable(UInt64), - sessionstart_revid Nullable(String), - sessionstart_timestamp Nullable(Datetime), - sessionstart_trackerversion Nullable(String), - sessionstart_useragent Nullable(String), - sessionstart_userbrowser Nullable(String), - sessionstart_userbrowserversion Nullable(String), - sessionstart_usercountry Nullable(String), - sessionstart_userdevice Nullable(String), - sessionstart_userdeviceheapsize Nullable(UInt64), - sessionstart_userdevicememorysize Nullable(UInt64), - sessionstart_userdevicetype Nullable(String), - sessionstart_useros Nullable(String), - sessionstart_userosversion Nullable(String), - sessionstart_useruuid Nullable(String), - setcssdata_data Nullable(UInt64), - setcssdata_id Nullable(UInt64), - setinputchecked_checked Nullable(UInt64), - setinputchecked_id Nullable(UInt64), - setinputtarget_id Nullable(UInt64), - setinputtarget_label Nullable(UInt64), - setinputvalue_id Nullable(UInt64), - setinputvalue_mask Nullable(UInt64), - setinputvalue_value Nullable(UInt64), - setnodeattribute_id Nullable(UInt64), - setnodeattribute_name Nullable(UInt64), - setnodeattribute_value Nullable(UInt64), - setnodedata_data Nullable(UInt64), - setnodedata_id Nullable(UInt64), - setnodescroll_id Nullable(UInt64), - setnodescroll_x Nullable(Int64), - setnodescroll_y Nullable(Int64), - setpagelocation_navigationstart Nullable(UInt64), - setpagelocation_referrer Nullable(String), - setpagelocation_url Nullable(String), - setpagevisibility_hidden Nullable(UInt8), - setviewportscroll_x Nullable(Int64), - setviewportscroll_y Nullable(Int64), - setviewportsize_height Nullable(UInt64), - setviewportsize_width Nullable(UInt64), - stateaction_type Nullable(String), - stateactionevent_messageid Nullable(UInt64), - stateactionevent_timestamp Nullable(Datetime), - stateactionevent_type Nullable(String), - timestamp_timestamp Nullable(Datetime), - useranonymousid_id Nullable(String), - userid_id Nullable(String), - vuex_mutation Nullable(String), - vuex_state Nullable(String), - received_at Datetime, - batch_order_number Int64 -) - ENGINE = Buffer(default, negatives, 16, 10, 120, 10000, 1000000, 10000, 100000000); diff --git a/ee/scripts/helm/db/init_dbs/clickhouse/create/negatives_creation_clickhouse.sql b/ee/scripts/helm/db/init_dbs/clickhouse/create/negatives_creation_clickhouse.sql deleted file mode 100644 index 361082d7c..000000000 --- a/ee/scripts/helm/db/init_dbs/clickhouse/create/negatives_creation_clickhouse.sql +++ /dev/null @@ -1,218 +0,0 @@ -CREATE TABLE IF NOT EXISTS negatives -( - sessionid UInt64, - clickevent_hesitationtime Nullable(UInt64), - clickevent_label Nullable(String), - clickevent_messageid Nullable(UInt64), - clickevent_timestamp Nullable(Datetime), - connectioninformation_downlink Nullable(UInt64), - connectioninformation_type Nullable(String), - consolelog_level Nullable(String), - consolelog_value Nullable(String), - cpuissue_duration Nullable(UInt64), - cpuissue_rate Nullable(UInt64), - cpuissue_timestamp Nullable(Datetime), - createdocument Nullable(UInt8), - createelementnode_id Nullable(UInt64), - createelementnode_parentid Nullable(UInt64), - cssdeleterule_index Nullable(UInt64), - cssdeleterule_stylesheetid Nullable(UInt64), - cssinsertrule_index Nullable(UInt64), - cssinsertrule_rule Nullable(String), - cssinsertrule_stylesheetid Nullable(UInt64), - customevent_messageid Nullable(UInt64), - customevent_name Nullable(String), - customevent_payload Nullable(String), - customevent_timestamp Nullable(Datetime), - domdrop_timestamp Nullable(Datetime), - errorevent_message Nullable(String), - errorevent_messageid Nullable(UInt64), - errorevent_name Nullable(String), - errorevent_payload Nullable(String), - errorevent_source Nullable(String), - errorevent_timestamp Nullable(Datetime), - fetch_duration Nullable(UInt64), - fetch_method Nullable(String), - fetch_request Nullable(String), - fetch_status Nullable(UInt64), - fetch_timestamp Nullable(Datetime), - fetch_url Nullable(String), - graphql_operationkind Nullable(String), - graphql_operationname Nullable(String), - graphql_response Nullable(String), - graphql_variables Nullable(String), - graphqlevent_messageid Nullable(UInt64), - graphqlevent_name Nullable(String), - graphqlevent_timestamp Nullable(Datetime), - inputevent_label Nullable(String), - inputevent_messageid Nullable(UInt64), - inputevent_timestamp Nullable(Datetime), - inputevent_value Nullable(String), - inputevent_valuemasked Nullable(UInt8), - jsexception_message Nullable(String), - jsexception_name Nullable(String), - jsexception_payload Nullable(String), - longtasks_timestamp Nullable(Datetime), - longtasks_duration Nullable(UInt64), - longtasks_containerid Nullable(String), - longtasks_containersrc Nullable(String), - memoryissue_duration Nullable(UInt64), - memoryissue_rate Nullable(UInt64), - memoryissue_timestamp Nullable(Datetime), - metadata_key Nullable(String), - metadata_value Nullable(String), - mobx_payload Nullable(String), - mobx_type Nullable(String), - mouseclick_id Nullable(UInt64), - mouseclick_hesitationtime Nullable(UInt64), - mouseclick_label Nullable(String), - mousemove_x Nullable(UInt64), - mousemove_y Nullable(UInt64), - movenode_id Nullable(UInt64), - movenode_index Nullable(UInt64), - movenode_parentid Nullable(UInt64), - ngrx_action Nullable(String), - ngrx_duration Nullable(UInt64), - ngrx_state Nullable(String), - pageevent_domcontentloadedeventend Nullable(UInt64), - pageevent_domcontentloadedeventstart Nullable(UInt64), - pageevent_firstcontentfulpaint Nullable(UInt64), - pageevent_firstpaint Nullable(UInt64), - pageevent_loaded Nullable(UInt8), - pageevent_loadeventend Nullable(UInt64), - pageevent_loadeventstart Nullable(UInt64), - pageevent_messageid Nullable(UInt64), - pageevent_referrer Nullable(String), - pageevent_requeststart Nullable(UInt64), - pageevent_responseend Nullable(UInt64), - pageevent_responsestart Nullable(UInt64), - pageevent_speedindex Nullable(UInt64), - pageevent_timestamp Nullable(Datetime), - pageevent_url Nullable(String), - pageloadtiming_domcontentloadedeventend Nullable(UInt64), - pageloadtiming_domcontentloadedeventstart Nullable(UInt64), - pageloadtiming_firstcontentfulpaint Nullable(UInt64), - pageloadtiming_firstpaint Nullable(UInt64), - pageloadtiming_loadeventend Nullable(UInt64), - pageloadtiming_loadeventstart Nullable(UInt64), - pageloadtiming_requeststart Nullable(UInt64), - pageloadtiming_responseend Nullable(UInt64), - pageloadtiming_responsestart Nullable(UInt64), - pagerendertiming_speedindex Nullable(UInt64), - pagerendertiming_timetointeractive Nullable(UInt64), - pagerendertiming_visuallycomplete Nullable(UInt64), - performancetrack_frames Nullable(Int64), - performancetrack_ticks Nullable(Int64), - performancetrack_totaljsheapsize Nullable(UInt64), - performancetrack_usedjsheapsize Nullable(UInt64), - performancetrackaggr_avgcpu Nullable(UInt64), - performancetrackaggr_avgfps Nullable(UInt64), - performancetrackaggr_avgtotaljsheapsize Nullable(UInt64), - performancetrackaggr_avgusedjsheapsize Nullable(UInt64), - performancetrackaggr_maxcpu Nullable(UInt64), - performancetrackaggr_maxfps Nullable(UInt64), - performancetrackaggr_maxtotaljsheapsize Nullable(UInt64), - performancetrackaggr_maxusedjsheapsize Nullable(UInt64), - performancetrackaggr_mincpu Nullable(UInt64), - performancetrackaggr_minfps Nullable(UInt64), - performancetrackaggr_mintotaljsheapsize Nullable(UInt64), - performancetrackaggr_minusedjsheapsize Nullable(UInt64), - performancetrackaggr_timestampend Nullable(Datetime), - performancetrackaggr_timestampstart Nullable(Datetime), - profiler_args Nullable(String), - profiler_duration Nullable(UInt64), - profiler_name Nullable(String), - profiler_result Nullable(String), - rawcustomevent_name Nullable(String), - rawcustomevent_payload Nullable(String), - rawerrorevent_message Nullable(String), - rawerrorevent_name Nullable(String), - rawerrorevent_payload Nullable(String), - rawerrorevent_source Nullable(String), - rawerrorevent_timestamp Nullable(Datetime), - redux_action Nullable(String), - redux_duration Nullable(UInt64), - redux_state Nullable(String), - removenode_id Nullable(UInt64), - removenodeattribute_id Nullable(UInt64), - removenodeattribute_name Nullable(String), - resourceevent_decodedbodysize Nullable(UInt64), - resourceevent_duration Nullable(UInt64), - resourceevent_encodedbodysize Nullable(UInt64), - resourceevent_headersize Nullable(UInt64), - resourceevent_messageid Nullable(UInt64), - resourceevent_method Nullable(String), - resourceevent_status Nullable(UInt64), - resourceevent_success Nullable(UInt8), - resourceevent_timestamp Nullable(Datetime), - resourceevent_ttfb Nullable(UInt64), - resourceevent_type Nullable(String), - resourceevent_url Nullable(String), - resourcetiming_decodedbodysize Nullable(UInt64), - resourcetiming_duration Nullable(UInt64), - resourcetiming_encodedbodysize Nullable(UInt64), - resourcetiming_headersize Nullable(UInt64), - resourcetiming_initiator Nullable(String), - resourcetiming_timestamp Nullable(Datetime), - resourcetiming_ttfb Nullable(UInt64), - resourcetiming_url Nullable(String), - sessiondisconnect Nullable(UInt8), - sessiondisconnect_timestamp Nullable(Datetime), - sessionend Nullable(UInt8), - sessionend_timestamp Nullable(Datetime), - sessionstart_projectid Nullable(UInt64), - sessionstart_revid Nullable(String), - sessionstart_timestamp Nullable(Datetime), - sessionstart_trackerversion Nullable(String), - sessionstart_useragent Nullable(String), - sessionstart_userbrowser Nullable(String), - sessionstart_userbrowserversion Nullable(String), - sessionstart_usercountry Nullable(String), - sessionstart_userdevice Nullable(String), - sessionstart_userdeviceheapsize Nullable(UInt64), - sessionstart_userdevicememorysize Nullable(UInt64), - sessionstart_userdevicetype Nullable(String), - sessionstart_useros Nullable(String), - sessionstart_userosversion Nullable(String), - sessionstart_useruuid Nullable(String), - setcssdata_data Nullable(UInt64), - setcssdata_id Nullable(UInt64), - setinputchecked_checked Nullable(UInt64), - setinputchecked_id Nullable(UInt64), - setinputtarget_id Nullable(UInt64), - setinputtarget_label Nullable(UInt64), - setinputvalue_id Nullable(UInt64), - setinputvalue_mask Nullable(UInt64), - setinputvalue_value Nullable(UInt64), - setnodeattribute_id Nullable(UInt64), - setnodeattribute_name Nullable(UInt64), - setnodeattribute_value Nullable(UInt64), - setnodedata_data Nullable(UInt64), - setnodedata_id Nullable(UInt64), - setnodescroll_id Nullable(UInt64), - setnodescroll_x Nullable(Int64), - setnodescroll_y Nullable(Int64), - setpagelocation_navigationstart Nullable(UInt64), - setpagelocation_referrer Nullable(String), - setpagelocation_url Nullable(String), - setpagevisibility_hidden Nullable(UInt8), - setviewportscroll_x Nullable(Int64), - setviewportscroll_y Nullable(Int64), - setviewportsize_height Nullable(UInt64), - setviewportsize_width Nullable(UInt64), - stateaction_type Nullable(String), - stateactionevent_messageid Nullable(UInt64), - stateactionevent_timestamp Nullable(Datetime), - stateactionevent_type Nullable(String), - timestamp_timestamp Nullable(Datetime), - useranonymousid_id Nullable(String), - userid_id Nullable(String), - vuex_mutation Nullable(String), - vuex_state Nullable(String), - received_at Datetime, - batch_order_number Int64 -) - ENGINE = MergeTree() - PARTITION BY toYYYYMM(received_at) - ORDER BY (received_at, batch_order_number) - SETTINGS min_bytes_for_wide_part = 1, use_minimalistic_part_header_in_zookeeper = 1; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/postgresql/1.8.0/1.8.0.sql b/ee/scripts/helm/db/init_dbs/postgresql/1.8.0/1.8.0.sql new file mode 100644 index 000000000..90c4322bc --- /dev/null +++ b/ee/scripts/helm/db/init_dbs/postgresql/1.8.0/1.8.0.sql @@ -0,0 +1,20 @@ +BEGIN; +CREATE OR REPLACE FUNCTION openreplay_version() + RETURNS text AS +$$ +SELECT 'v1.8.0-ee' +$$ LANGUAGE sql IMMUTABLE; + +ALTER TABLE IF EXISTS projects + ADD COLUMN IF NOT EXISTS first_recorded_session_at timestamp without time zone NULL DEFAULT NULL, + ADD COLUMN IF NOT EXISTS sessions_last_check_at timestamp without time zone NULL DEFAULT NULL; + +COMMIT; + +CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS autocomplete_unique_project_id_md5value_type_idx ON autocomplete (project_id, md5(value), type); + +BEGIN; + +DROP INDEX IF EXISTS autocomplete_unique; + +COMMIT; \ No newline at end of file diff --git a/ee/scripts/helm/db/init_dbs/postgresql/init_schema.sql b/ee/scripts/helm/db/init_dbs/postgresql/init_schema.sql index dfae901c5..dd5c380da 100644 --- a/ee/scripts/helm/db/init_dbs/postgresql/init_schema.sql +++ b/ee/scripts/helm/db/init_dbs/postgresql/init_schema.sql @@ -7,7 +7,7 @@ CREATE EXTENSION IF NOT EXISTS pgcrypto; CREATE OR REPLACE FUNCTION openreplay_version() RETURNS text AS $$ -SELECT 'v1.7.0-ee' +SELECT 'v1.8.0-ee' $$ LANGUAGE sql IMMUTABLE; @@ -228,32 +228,34 @@ $$ CREATE TABLE IF NOT EXISTS projects ( - project_id integer generated BY DEFAULT AS IDENTITY PRIMARY KEY, - project_key varchar(20) NOT NULL UNIQUE DEFAULT generate_api_key(20), - tenant_id integer NOT NULL REFERENCES tenants (tenant_id) ON DELETE CASCADE, - name text NOT NULL, - active boolean NOT NULL, - sample_rate smallint NOT NULL DEFAULT 100 CHECK (sample_rate >= 0 AND sample_rate <= 100), - created_at timestamp without time zone NOT NULL DEFAULT (now() at time zone 'utc'), - deleted_at timestamp without time zone NULL DEFAULT NULL, - max_session_duration integer NOT NULL DEFAULT 7200000, - metadata_1 text DEFAULT NULL, - metadata_2 text DEFAULT NULL, - metadata_3 text DEFAULT NULL, - metadata_4 text DEFAULT NULL, - metadata_5 text DEFAULT NULL, - metadata_6 text DEFAULT NULL, - metadata_7 text DEFAULT NULL, - metadata_8 text DEFAULT NULL, - metadata_9 text DEFAULT NULL, - metadata_10 text DEFAULT NULL, - save_request_payloads boolean NOT NULL DEFAULT FALSE, - gdpr jsonb NOT NULL DEFAULT'{ + project_id integer generated BY DEFAULT AS IDENTITY PRIMARY KEY, + project_key varchar(20) NOT NULL UNIQUE DEFAULT generate_api_key(20), + tenant_id integer NOT NULL REFERENCES tenants (tenant_id) ON DELETE CASCADE, + name text NOT NULL, + active boolean NOT NULL, + sample_rate smallint NOT NULL DEFAULT 100 CHECK (sample_rate >= 0 AND sample_rate <= 100), + created_at timestamp without time zone NOT NULL DEFAULT (now() at time zone 'utc'), + deleted_at timestamp without time zone NULL DEFAULT NULL, + max_session_duration integer NOT NULL DEFAULT 7200000, + metadata_1 text DEFAULT NULL, + metadata_2 text DEFAULT NULL, + metadata_3 text DEFAULT NULL, + metadata_4 text DEFAULT NULL, + metadata_5 text DEFAULT NULL, + metadata_6 text DEFAULT NULL, + metadata_7 text DEFAULT NULL, + metadata_8 text DEFAULT NULL, + metadata_9 text DEFAULT NULL, + metadata_10 text DEFAULT NULL, + save_request_payloads boolean NOT NULL DEFAULT FALSE, + gdpr jsonb NOT NULL DEFAULT'{ "maskEmails": true, "sampleRate": 33, "maskNumbers": false, "defaultInputMode": "plain" - }'::jsonb + }'::jsonb, + first_recorded_session_at timestamp without time zone NULL DEFAULT NULL, + sessions_last_check_at timestamp without time zone NULL DEFAULT NULL ); diff --git a/ee/utilities/.gitignore b/ee/utilities/.gitignore index 0eaed6d80..a11e6be97 100644 --- a/ee/utilities/.gitignore +++ b/ee/utilities/.gitignore @@ -14,3 +14,4 @@ servers/sourcemaps-server.js /utils/HeapSnapshot.js /utils/helper.js /utils/assistHelper.js +.local diff --git a/scripts/helm/db/init_dbs/postgresql/1.8.0/1.8.0.sql b/scripts/helm/db/init_dbs/postgresql/1.8.0/1.8.0.sql new file mode 100644 index 000000000..bfe903ee5 --- /dev/null +++ b/scripts/helm/db/init_dbs/postgresql/1.8.0/1.8.0.sql @@ -0,0 +1,20 @@ +BEGIN; +CREATE OR REPLACE FUNCTION openreplay_version() + RETURNS text AS +$$ +SELECT 'v1.8.0' +$$ LANGUAGE sql IMMUTABLE; + +ALTER TABLE IF EXISTS projects + ADD COLUMN IF NOT EXISTS first_recorded_session_at timestamp without time zone NULL DEFAULT NULL, + ADD COLUMN IF NOT EXISTS sessions_last_check_at timestamp without time zone NULL DEFAULT NULL; + +COMMIT; + +CREATE UNIQUE INDEX CONCURRENTLY IF NOT EXISTS autocomplete_unique_project_id_md5value_type_idx ON autocomplete (project_id, md5(value), type); + +BEGIN; + +DROP INDEX IF EXISTS autocomplete_unique; + +COMMIT; \ No newline at end of file diff --git a/scripts/helm/db/init_dbs/postgresql/init_schema.sql b/scripts/helm/db/init_dbs/postgresql/init_schema.sql index 5f23d7a79..d8fe2afcb 100644 --- a/scripts/helm/db/init_dbs/postgresql/init_schema.sql +++ b/scripts/helm/db/init_dbs/postgresql/init_schema.sql @@ -6,7 +6,7 @@ CREATE SCHEMA IF NOT EXISTS events; CREATE OR REPLACE FUNCTION openreplay_version() RETURNS text AS $$ -SELECT 'v1.7.0' +SELECT 'v1.8.0' $$ LANGUAGE sql IMMUTABLE; -- --- accounts.sql --- @@ -173,31 +173,33 @@ $$ CREATE TABLE projects ( - project_id integer generated BY DEFAULT AS IDENTITY PRIMARY KEY, - project_key varchar(20) NOT NULL UNIQUE DEFAULT generate_api_key(20), - name text NOT NULL, - active boolean NOT NULL, - sample_rate smallint NOT NULL DEFAULT 100 CHECK (sample_rate >= 0 AND sample_rate <= 100), - created_at timestamp without time zone NOT NULL DEFAULT (now() at time zone 'utc'), - deleted_at timestamp without time zone NULL DEFAULT NULL, - max_session_duration integer NOT NULL DEFAULT 7200000, - metadata_1 text DEFAULT NULL, - metadata_2 text DEFAULT NULL, - metadata_3 text DEFAULT NULL, - metadata_4 text DEFAULT NULL, - metadata_5 text DEFAULT NULL, - metadata_6 text DEFAULT NULL, - metadata_7 text DEFAULT NULL, - metadata_8 text DEFAULT NULL, - metadata_9 text DEFAULT NULL, - metadata_10 text DEFAULT NULL, - save_request_payloads boolean NOT NULL DEFAULT FALSE, - gdpr jsonb NOT NULL DEFAULT '{ + project_id integer generated BY DEFAULT AS IDENTITY PRIMARY KEY, + project_key varchar(20) NOT NULL UNIQUE DEFAULT generate_api_key(20), + name text NOT NULL, + active boolean NOT NULL, + sample_rate smallint NOT NULL DEFAULT 100 CHECK (sample_rate >= 0 AND sample_rate <= 100), + created_at timestamp without time zone NOT NULL DEFAULT (now() at time zone 'utc'), + deleted_at timestamp without time zone NULL DEFAULT NULL, + max_session_duration integer NOT NULL DEFAULT 7200000, + metadata_1 text DEFAULT NULL, + metadata_2 text DEFAULT NULL, + metadata_3 text DEFAULT NULL, + metadata_4 text DEFAULT NULL, + metadata_5 text DEFAULT NULL, + metadata_6 text DEFAULT NULL, + metadata_7 text DEFAULT NULL, + metadata_8 text DEFAULT NULL, + metadata_9 text DEFAULT NULL, + metadata_10 text DEFAULT NULL, + save_request_payloads boolean NOT NULL DEFAULT FALSE, + gdpr jsonb NOT NULL DEFAULT '{ "maskEmails": true, "sampleRate": 33, "maskNumbers": false, "defaultInputMode": "plain" - }'::jsonb -- ?????? + }'::jsonb, + first_recorded_session_at timestamp without time zone NULL DEFAULT NULL, + sessions_last_check_at timestamp without time zone NULL DEFAULT NULL ); CREATE INDEX projects_project_key_idx ON public.projects (project_key); diff --git a/utilities/.dockerignore b/utilities/.dockerignore index b6aaccd33..1ba4ae6a6 100644 --- a/utilities/.dockerignore +++ b/utilities/.dockerignore @@ -4,3 +4,4 @@ **/build.sh **/build_*.sh **/*deploy.sh +.local \ No newline at end of file diff --git a/utilities/.gitignore b/utilities/.gitignore index fffbe974b..4486f0a7f 100644 --- a/utilities/.gitignore +++ b/utilities/.gitignore @@ -2,4 +2,5 @@ node_modules npm-debug.log .cache -test.html \ No newline at end of file +test.html +.local \ No newline at end of file