From 662e5299c12b2299fe679d369aaf400c7348c2b9 Mon Sep 17 00:00:00 2001 From: Kraiem Taha Yassine Date: Mon, 9 Oct 2023 12:52:44 +0200 Subject: [PATCH] Api v1.15.0 (#1506) * feat(chalice): upgraded dependencies * feat(chalice): changed path analysis schema * feat(DB): click coordinate support * feat(chalice): changed path analysis issues schema feat(chalice): upgraded dependencies * fix(chalice): fixed pydantic issue * refactor(chalice): refresh token validator * feat(chalice): role restrictions * feat(chalice): EE path analysis changes * refactor(DB): changed creation queries refactor(DB): changed delte queries feat(DB): support new path analysis payload * feat(chalice): save path analysis card * feat(chalice): restrict access * feat(chalice): restrict access * feat(chalice): EE save new path analysis card * refactor(chalice): path analysis * feat(chalice): path analysis new query * fix(chalice): configurable CH config * fix(chalice): assist autocomplete * refactor(chalice): refactored permissions * refactor(chalice): changed log level * refactor(chalice): upgraded dependencies * refactor(chalice): changed path analysis query * refactor(chalice): changed path analysis query * refactor(chalice): upgraded dependencies refactor(alerts): upgraded dependencies refactor(crons): upgraded dependencies * feat(chalice): path analysis ignore start point * feat(chalice): path analysis in progress --- api/Pipfile | 6 +- api/app.py | 6 +- api/chalicelib/core/product_analytics.py | 343 +++++++++++++++++++- api/entrypoint.sh | 2 +- api/entrypoint_alerts.sh | 2 +- api/requirements-alerts.txt | 6 +- api/requirements.txt | 6 +- api/routers/core.py | 3 +- api/run-dev.sh | 2 +- api/schemas/schemas.py | 7 +- ee/api/Pipfile | 6 +- ee/api/app.py | 6 +- ee/api/chalicelib/core/product_analytics.py | 7 +- ee/api/chalicelib/utils/ch_client.py | 5 +- ee/api/entrypoint.sh | 3 +- ee/api/entrypoint_alerts.sh | 2 +- ee/api/env.default | 1 + ee/api/requirements-alerts.txt | 6 +- ee/api/requirements-crons.txt | 6 +- ee/api/requirements.txt | 6 +- ee/api/schemas/schemas_ee.py | 10 + 21 files changed, 384 insertions(+), 57 deletions(-) diff --git a/api/Pipfile b/api/Pipfile index 9e9118b8c..dbcfa79f9 100644 --- a/api/Pipfile +++ b/api/Pipfile @@ -5,12 +5,12 @@ name = "pypi" [packages] requests = "==2.31.0" -boto3 = "==1.28.55" +boto3 = "==1.28.62" pyjwt = "==2.8.0" -psycopg2-binary = "==2.9.7" +psycopg2-binary = "==2.9.9" elasticsearch = "==8.10.0" jira = "==3.5.2" -fastapi = "==0.103.1" +fastapi = "==0.103.2" python-decouple = "==3.8" apscheduler = "==3.10.4" redis = "==5.0.1" diff --git a/api/app.py b/api/app.py index 88c6c9243..7b16c308b 100644 --- a/api/app.py +++ b/api/app.py @@ -14,7 +14,7 @@ from routers import core, core_dynamic from crons import core_crons, core_dynamic_crons from routers.subs import insights, metrics, v1_api, health -loglevel = config("LOGLEVEL", default=logging.INFO) +loglevel = config("LOGLEVEL", default=logging.WARNING) print(f">Loglevel set to: {loglevel}") logging.basicConfig(level=loglevel) @@ -59,8 +59,8 @@ async def or_middleware(request: Request, call_next): response: StreamingResponse = await call_next(request) if helper.TRACK_TIME: now = int(time.time() * 1000) - now - if now > 500: - logging.info(f"Execution time: {now} ms") + if now > 1500: + logging.warning(f"Execution time: {now} ms for {request.method}:{request.url.path}") return response diff --git a/api/chalicelib/core/product_analytics.py b/api/chalicelib/core/product_analytics.py index ecf370741..79cee6642 100644 --- a/api/chalicelib/core/product_analytics.py +++ b/api/chalicelib/core/product_analytics.py @@ -10,20 +10,6 @@ from chalicelib.utils import sql_helper as sh from time import time -def __transform_journey(rows): - nodes = [] - links = [] - for r in rows: - source = r["source_event"][r["source_event"].index("_") + 1:] - target = r["target_event"][r["target_event"].index("_") + 1:] - if source not in nodes: - nodes.append(source) - if target not in nodes: - nodes.append(target) - links.append({"source": nodes.index(source), "target": nodes.index(target), "value": r["value"]}) - return {"nodes": nodes, "links": sorted(links, key=lambda x: x["value"], reverse=True)} - - def __transform_journey2(rows, reverse_path=False): # nodes should contain duplicates for different steps otherwise the UI crashes nodes = [] @@ -33,14 +19,12 @@ def __transform_journey2(rows, reverse_path=False): source = f"{r['event_number_in_session']}_{r['event_type']}_{r['e_value']}" if source not in nodes: nodes.append(source) - # TODO: remove this after UI supports long values - nodes_values.append({"name": r['e_value'][:10], "eventType": r['event_type']}) + nodes_values.append({"name": r['e_value'], "eventType": r['event_type']}) if r['next_value']: target = f"{r['event_number_in_session'] + 1}_{r['next_type']}_{r['next_value']}" if target not in nodes: nodes.append(target) - # TODO: remove this after UI supports long values - nodes_values.append({"name": r['next_value'][:10], "eventType": r['next_type']}) + nodes_values.append({"name": r['next_value'], "eventType": r['next_type']}) link = {"eventType": r['event_type'], "value": r["sessions_count"], "avgTimeToTarget": r["avg_time_to_target"]} if not reverse_path: @@ -63,6 +47,7 @@ JOURNEY_TYPES = { } +# query: Q2, the result is correct def path_analysis(project_id: int, data: schemas.CardPathAnalysis): sub_events = [] start_points_join = "" @@ -274,6 +259,328 @@ def path_analysis(project_id: int, data: schemas.CardPathAnalysis): WHERE {" AND ".join(sub_events_conditions)}""") events_subquery = "\n UNION ALL \n".join(events_subquery) + if reverse: + path_direction = "DESC" + else: + path_direction = "" + + if len(start_points_conditions) == 0: + start_points_join = """INNER JOIN + (SELECT event_type, e_value + FROM ranked_events + WHERE event_number_in_session = 1 + GROUP BY event_type, e_value + ORDER BY count(1) DESC + LIMIT 2 + ) AS top_start_events USING (event_type, e_value)""" + else: + start_points_conditions = ["(" + " OR ".join(start_points_conditions) + ")"] + start_points_conditions.append("event_number_in_session = 1") + + steps_query = ["""n1 AS (SELECT event_number_in_session, + event_type, + e_value, + next_type, + next_value, + time_to_next, + count(1) AS sessions_count + FROM ranked_events + INNER JOIN start_points USING (session_id) + WHERE event_number_in_session = 1 + GROUP BY event_number_in_session, event_type, e_value, next_type, next_value, time_to_next)"""] + projection_query = ["""(SELECT event_number_in_session, + event_type, + e_value, + next_type, + next_value, + sessions_count, + avg(time_to_next) AS avg_time_to_target + FROM n1 + GROUP BY event_number_in_session, event_type, e_value, next_type, next_value, sessions_count + ORDER BY event_number_in_session, event_type, e_value, next_type, next_value)"""] + for i in range(2, data.density): + steps_query.append(f"""n{i} AS (SELECT * + FROM (SELECT re.event_number_in_session, + re.event_type, + re.e_value, + re.next_type, + re.next_value, + re.time_to_next, + count(1) AS sessions_count + FROM ranked_events AS re + INNER JOIN start_points USING (session_id) + INNER JOIN n{i - 1} ON (n{i - 1}.next_value = re.e_value) + WHERE re.event_number_in_session = {i} + GROUP BY re.event_number_in_session, re.event_type, re.e_value, re.next_type, re.next_value, + re.time_to_next) AS sub_level + ORDER BY sessions_count DESC + LIMIT %(eventThresholdNumberInGroup)s)""") + projection_query.append(f"""(SELECT event_number_in_session, + event_type, + e_value, + next_type, + next_value, + sessions_count, + avg(time_to_next) AS avg_time_to_target + FROM n{i} + GROUP BY event_number_in_session, event_type, e_value, next_type, next_value, sessions_count + ORDER BY event_number_in_session, event_type, e_value, next_type, next_value)""") + + with pg_client.PostgresClient() as cur: + pg_query = f"""\ +WITH sub_sessions AS ( SELECT session_id + FROM public.sessions + WHERE {" AND ".join(sessions_conditions)}), + sub_events AS ({events_subquery}), + ranked_events AS (SELECT * + FROM (SELECT session_id, + event_type, + e_value, + row_number() OVER (PARTITION BY session_id ORDER BY timestamp {path_direction}) AS event_number_in_session, + LEAD(e_value, 1) OVER (PARTITION BY session_id ORDER BY timestamp {path_direction}) AS next_value, + LEAD(event_type, 1) OVER (PARTITION BY session_id ORDER BY timestamp {path_direction}) AS next_type, + abs(LEAD(timestamp, 1) OVER (PARTITION BY session_id ORDER BY timestamp {path_direction}) - + timestamp) AS time_to_next + FROM sub_events) AS full_ranked_events + WHERE event_number_in_session < %(density)s + ), + start_points AS (SELECT session_id + FROM ranked_events {start_points_join} + WHERE {" AND ".join(start_points_conditions)}), + {",".join(steps_query)} +{"UNION ALL".join(projection_query)};""" + params = {"project_id": project_id, "startTimestamp": data.startTimestamp, + "endTimestamp": data.endTimestamp, "density": data.density, + "eventThresholdNumberInGroup": 8 if data.hide_excess else 6, + # TODO: add if data=args is required + # **__get_constraint_values(args), + **extra_values} + query = cur.mogrify(pg_query, params) + _now = time() + + cur.execute(query) + if time() - _now > 0: + print(f">>>>>>>>>PathAnalysis long query ({int(time() - _now)}s)<<<<<<<<<") + print("----------------------") + print(query) + print("----------------------") + rows = cur.fetchall() + + return __transform_journey2(rows=rows, reverse_path=reverse) + + +# the query generated by this function is retuning a wrong result +def path_analysis_deprecated(project_id: int, data: schemas.CardPathAnalysis): + sub_events = [] + start_points_join = "" + start_points_conditions = [] + sessions_conditions = ["start_ts>=%(startTimestamp)s", "start_ts<%(endTimestamp)s", + "project_id=%(project_id)s", "events_count > 1", "duration>0"] + if len(data.metric_value) == 0: + data.metric_value.append(schemas.ProductAnalyticsSelectedEventType.location) + sub_events.append({"table": JOURNEY_TYPES[schemas.ProductAnalyticsSelectedEventType.location]["table"], + "column": JOURNEY_TYPES[schemas.ProductAnalyticsSelectedEventType.location]["column"], + "eventType": schemas.ProductAnalyticsSelectedEventType.location.value}) + else: + for v in data.metric_value: + if JOURNEY_TYPES.get(v): + sub_events.append({"table": JOURNEY_TYPES[v]["table"], + "column": JOURNEY_TYPES[v]["column"], + "eventType": v}) + + extra_values = {} + reverse = data.start_type == "end" + for i, sf in enumerate(data.start_point): + f_k = f"start_point_{i}" + op = sh.get_sql_operator(sf.operator) + is_not = sh.is_negation_operator(sf.operator) + extra_values = {**extra_values, **sh.multi_values(sf.value, value_key=f_k)} + start_points_conditions.append(f"(event_type='{sf.type}' AND " + + sh.multi_conditions(f'e_value {op} %({f_k})s', sf.value, is_not=is_not, + value_key=f_k) + + ")") + + exclusions = {} + for i, ef in enumerate(data.exclude): + if ef.type in data.metric_value: + f_k = f"exclude_{i}" + extra_values = {**extra_values, **sh.multi_values(ef.value, value_key=f_k)} + exclusions[ef.type] = [ + sh.multi_conditions(f'{JOURNEY_TYPES[ef.type]["column"]} != %({f_k})s', ef.value, is_not=True, + value_key=f_k)] + + meta_keys = None + for i, f in enumerate(data.series[0].filter.filters): + op = sh.get_sql_operator(f.operator) + is_any = sh.isAny_opreator(f.operator) + is_not = sh.is_negation_operator(f.operator) + is_undefined = sh.isUndefined_operator(f.operator) + f_k = f"f_value_{i}" + extra_values = {**extra_values, **sh.multi_values(f.value, value_key=f_k)} + + # ---- meta-filters + if f.type == schemas.FilterType.user_browser: + if is_any: + sessions_conditions.append('user_browser IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'user_browser {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif f.type in [schemas.FilterType.user_os]: + if is_any: + sessions_conditions.append('user_os IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'user_os {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif f.type in [schemas.FilterType.user_device]: + if is_any: + sessions_conditions.append('user_device IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'user_device {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif f.type in [schemas.FilterType.user_country]: + if is_any: + sessions_conditions.append('user_country IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'user_country {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif f.type == schemas.FilterType.user_city: + if is_any: + sessions_conditions.append('user_city IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'user_city {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif f.type == schemas.FilterType.user_state: + if is_any: + sessions_conditions.append('user_state IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'user_state {op} %({f_k})s', f.value, is_not=is_not, value_key=f_k)) + + elif f.type in [schemas.FilterType.utm_source]: + if is_any: + sessions_conditions.append('utm_source IS NOT NULL') + elif is_undefined: + sessions_conditions.append('utm_source IS NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'utm_source {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + + elif f.type in [schemas.FilterType.utm_medium]: + if is_any: + sessions_conditions.append('utm_medium IS NOT NULL') + elif is_undefined: + sessions_conditions.append('utm_medium IS NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'utm_medium {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + + elif f.type in [schemas.FilterType.utm_campaign]: + if is_any: + sessions_conditions.append('utm_campaign IS NOT NULL') + elif is_undefined: + sessions_conditions.append('utm_campaign IS NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f'utm_campaign {op} %({f_k})s::text', f.value, is_not=is_not, + value_key=f_k)) + + elif f.type == schemas.FilterType.duration: + if len(f.value) > 0 and f.value[0] is not None: + sessions_conditions.append("duration >= %(minDuration)s") + extra_values["minDuration"] = f.value[0] + if len(f.value) > 1 and f.value[1] is not None and int(f.value[1]) > 0: + sessions_conditions.append("duration <= %(maxDuration)s") + extra_values["maxDuration"] = f.value[1] + elif f.type == schemas.FilterType.referrer: + # extra_from += f"INNER JOIN {events.event_type.LOCATION.table} AS p USING(session_id)" + if is_any: + sessions_conditions.append('base_referrer IS NOT NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f"base_referrer {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + elif f.type == schemas.FilterType.metadata: + # get metadata list only if you need it + if meta_keys is None: + meta_keys = metadata.get(project_id=project_id) + meta_keys = {m["key"]: m["index"] for m in meta_keys} + if f.source in meta_keys.keys(): + if is_any: + sessions_conditions.append(f"{metadata.index_to_colname(meta_keys[f.source])} IS NOT NULL") + elif is_undefined: + sessions_conditions.append(f"{metadata.index_to_colname(meta_keys[f.source])} IS NULL") + else: + sessions_conditions.append( + sh.multi_conditions( + f"{metadata.index_to_colname(meta_keys[f.source])} {op} %({f_k})s::text", + f.value, is_not=is_not, value_key=f_k)) + + elif f.type in [schemas.FilterType.user_id, schemas.FilterType.user_id_ios]: + if is_any: + sessions_conditions.append('user_id IS NOT NULL') + elif is_undefined: + sessions_conditions.append('user_id IS NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f"s.user_id {op} %({f_k})s::text", f.value, is_not=is_not, + value_key=f_k)) + + elif f.type in [schemas.FilterType.user_anonymous_id, + schemas.FilterType.user_anonymous_id_ios]: + if is_any: + sessions_conditions.append('user_anonymous_id IS NOT NULL') + elif is_undefined: + sessions_conditions.append('user_anonymous_id IS NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f"user_anonymous_id {op} %({f_k})s::text", f.value, is_not=is_not, + value_key=f_k)) + + elif f.type in [schemas.FilterType.rev_id, schemas.FilterType.rev_id_ios]: + if is_any: + sessions_conditions.append('rev_id IS NOT NULL') + elif is_undefined: + sessions_conditions.append('rev_id IS NULL') + else: + sessions_conditions.append( + sh.multi_conditions(f"rev_id {op} %({f_k})s::text", f.value, is_not=is_not, value_key=f_k)) + + elif f.type == schemas.FilterType.platform: + # op = __ sh.get_sql_operator(f.operator) + sessions_conditions.append( + sh.multi_conditions(f"user_device_type {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + + elif f.type == schemas.FilterType.issue: + if is_any: + sessions_conditions.append("array_length(issue_types, 1) > 0") + else: + sessions_conditions.append( + sh.multi_conditions(f"%({f_k})s {op} ANY (issue_types)", f.value, is_not=is_not, + value_key=f_k)) + + elif f.type == schemas.FilterType.events_count: + sessions_conditions.append( + sh.multi_conditions(f"events_count {op} %({f_k})s", f.value, is_not=is_not, + value_key=f_k)) + events_subquery = [] + for t in sub_events: + sub_events_conditions = ["e.timestamp >= %(startTimestamp)s", + "e.timestamp < %(endTimestamp)s"] + exclusions.get(t["eventType"], []) + events_subquery.append(f"""\ + SELECT session_id, {t["column"]} AS e_value, timestamp, '{t["eventType"]}' AS event_type + FROM {t["table"]} AS e + INNER JOIN sub_sessions USING (session_id) + WHERE {" AND ".join(sub_events_conditions)}""") + events_subquery = "\n UNION ALL \n".join(events_subquery) + if reverse: path_direction = "DESC" else: diff --git a/api/entrypoint.sh b/api/entrypoint.sh index 401046526..df27601ed 100755 --- a/api/entrypoint.sh +++ b/api/entrypoint.sh @@ -1,3 +1,3 @@ #!/bin/sh -uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --proxy-headers +uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --proxy-headers --log-level ${LOGLEVEL:-warning} diff --git a/api/entrypoint_alerts.sh b/api/entrypoint_alerts.sh index 9ac93dd60..6e1f10178 100755 --- a/api/entrypoint_alerts.sh +++ b/api/entrypoint_alerts.sh @@ -1,3 +1,3 @@ #!/bin/sh export ASSIST_KEY=ignore -uvicorn app:app --host 0.0.0.0 --port 8888 +uvicorn app:app --host 0.0.0.0 --port 8888 --log-level ${LOGLEVEL:-warning} diff --git a/api/requirements-alerts.txt b/api/requirements-alerts.txt index d6e083b88..2acf2c278 100644 --- a/api/requirements-alerts.txt +++ b/api/requirements-alerts.txt @@ -1,15 +1,15 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.31.0 -boto3==1.28.55 +boto3==1.28.62 pyjwt==2.8.0 -psycopg2-binary==2.9.7 +psycopg2-binary==2.9.9 elasticsearch==8.10.0 jira==3.5.2 -fastapi==0.103.1 +fastapi==0.103.2 uvicorn[standard]==0.23.2 python-decouple==3.8 pydantic[email]==2.3.0 diff --git a/api/requirements.txt b/api/requirements.txt index 04c9fced5..490d423ea 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,15 +1,15 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.31.0 -boto3==1.28.55 +boto3==1.28.62 pyjwt==2.8.0 -psycopg2-binary==2.9.7 +psycopg2-binary==2.9.9 elasticsearch==8.10.0 jira==3.5.2 -fastapi==0.103.1 +fastapi==0.103.2 uvicorn[standard]==0.23.2 python-decouple==3.8 pydantic[email]==2.3.0 diff --git a/api/routers/core.py b/api/routers/core.py index 4bbe761f3..5ad302eb4 100644 --- a/api/routers/core.py +++ b/api/routers/core.py @@ -31,7 +31,8 @@ def events_search(projectId: int, q: str, if len(q) == 0: return {"data": []} if live: - return assist.autocomplete(project_id=projectId, q=q, key=type.value if type is not None else None) + return assist.autocomplete(project_id=projectId, q=q, + key=key if key is not None else type.value if type is not None else None) if type in [schemas.FetchFilterType._url]: type = schemas.EventType.request elif type in [schemas.GraphqlFilterType._name]: diff --git a/api/run-dev.sh b/api/run-dev.sh index 76682286d..519afe451 100755 --- a/api/run-dev.sh +++ b/api/run-dev.sh @@ -1,3 +1,3 @@ #!/bin/zsh -uvicorn app:app --reload \ No newline at end of file +uvicorn app:app --reload --log-level ${LOGLEVEL:-warning} \ No newline at end of file diff --git a/api/schemas/schemas.py b/api/schemas/schemas.py index 72ebfc972..d4dcf325c 100644 --- a/api/schemas/schemas.py +++ b/api/schemas/schemas.py @@ -1295,11 +1295,16 @@ class CardPathAnalysis(__CardSchema): return values @model_validator(mode="after") - def __enforce_metric_value(cls, values): + def __clean_start_point_and_enforce_metric_value(cls, values): + start_point = [] metric_value = [] for s in values.start_point: + if len(s.value) == 0: + continue + start_point.append(s) metric_value.append(s.type) + values.start_point = start_point if len(metric_value) > 0: metric_value = remove_duplicate_values(metric_value) values.metric_value = metric_value diff --git a/ee/api/Pipfile b/ee/api/Pipfile index f79abcae7..ace2a3813 100644 --- a/ee/api/Pipfile +++ b/ee/api/Pipfile @@ -6,12 +6,12 @@ name = "pypi" [packages] urllib3 = "==1.26.16" requests = "==2.31.0" -boto3 = "==1.28.55" +boto3 = "==1.28.62" pyjwt = "==2.8.0" -psycopg2-binary = "==2.9.7" +psycopg2-binary = "==2.9.9" elasticsearch = "==8.10.0" jira = "==3.5.2" -fastapi = "==0.103.1" +fastapi = "==0.103.2" gunicorn = "==21.2.0" python-decouple = "==3.8" apscheduler = "==3.10.4" diff --git a/ee/api/app.py b/ee/api/app.py index 6ca8f2cfd..a1041f75d 100644 --- a/ee/api/app.py +++ b/ee/api/app.py @@ -23,7 +23,7 @@ from crons import core_crons, ee_crons, core_dynamic_crons from routers.subs import insights, metrics, v1_api_ee from routers.subs import v1_api, health -loglevel = config("LOGLEVEL", default=logging.INFO) +loglevel = config("LOGLEVEL", default=logging.WARNING) print(f">Loglevel set to: {loglevel}") logging.basicConfig(level=loglevel) @@ -76,8 +76,8 @@ async def or_middleware(request: Request, call_next): response: StreamingResponse = await call_next(request) if helper.TRACK_TIME: now = int(time.time() * 1000) - now - if now > 500: - logging.info(f"Execution time: {now} ms") + if now > 1500: + logging.warning(f"Execution time: {now} ms for {request.method}:{request.url.path}") return response diff --git a/ee/api/chalicelib/core/product_analytics.py b/ee/api/chalicelib/core/product_analytics.py index 86ae82318..cec3f1879 100644 --- a/ee/api/chalicelib/core/product_analytics.py +++ b/ee/api/chalicelib/core/product_analytics.py @@ -34,14 +34,13 @@ def __transform_journey2(rows, reverse_path=False): source = f"{r['event_number_in_session']}_{r['event_type']}_{r['e_value']}" if source not in nodes: nodes.append(source) - # TODO: remove this after UI supports long values - nodes_values.append({"name": r['e_value'][:10], "eventType": r['event_type']}) + nodes_values.append({"name": r['e_value'], "eventType": r['event_type']}) if r['next_value']: target = f"{r['event_number_in_session'] + 1}_{r['next_type']}_{r['next_value']}" if target not in nodes: nodes.append(target) # TODO: remove this after UI supports long values - nodes_values.append({"name": r['next_value'][:10], "eventType": r['next_type']}) + nodes_values.append({"name": r['next_value'], "eventType": r['next_type']}) link = {"eventType": r['event_type'], "value": r["sessions_count"], "avgTimeToTarget": r["avg_time_to_target"]} if not reverse_path: @@ -363,7 +362,7 @@ ORDER BY event_number_in_session, e_value, next_value;""" _now = time() rows = ch.execute(query=ch_query, params=params) - if time() - _now > 3: + if time() - _now > 0: print(f">>>>>>>>>PathAnalysis long query EE ({int(time() - _now)}s)<<<<<<<<<") print("----------------------") print(print(ch.format(ch_query, params))) diff --git a/ee/api/chalicelib/utils/ch_client.py b/ee/api/chalicelib/utils/ch_client.py index c986fe581..cbd27d235 100644 --- a/ee/api/chalicelib/utils/ch_client.py +++ b/ee/api/chalicelib/utils/ch_client.py @@ -19,6 +19,9 @@ class ClickHouseClient: __client = None def __init__(self, database=None): + extra_args = {} + if config("CH_COMPRESSION", cast=bool, default=True): + extra_args["compression"] = "lz4" self.__client = clickhouse_driver.Client(host=config("ch_host"), database=database if database else config("ch_database", default="default"), @@ -26,7 +29,7 @@ class ClickHouseClient: password=config("ch_password", default=""), port=config("ch_port", cast=int), settings=settings, - compression='lz4') \ + **extra_args) \ if self.__client is None else self.__client def __enter__(self): diff --git a/ee/api/entrypoint.sh b/ee/api/entrypoint.sh index a95dc620a..faedf5ecf 100755 --- a/ee/api/entrypoint.sh +++ b/ee/api/entrypoint.sh @@ -4,4 +4,5 @@ source /tmp/.env.override #uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --proxy-headers NB_WORKERS="${NB_WORKERS:=4}" -gunicorn app:app --workers $NB_WORKERS --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$LISTEN_PORT \ No newline at end of file +gunicorn app:app --workers $NB_WORKERS --worker-class uvicorn.workers.UvicornWorker \ + --bind 0.0.0.0:$LISTEN_PORT --log-level ${LOGLEVEL:-warning} \ No newline at end of file diff --git a/ee/api/entrypoint_alerts.sh b/ee/api/entrypoint_alerts.sh index 410015142..d3c80e252 100755 --- a/ee/api/entrypoint_alerts.sh +++ b/ee/api/entrypoint_alerts.sh @@ -2,4 +2,4 @@ export ASSIST_KEY=ignore sh env_vars.sh source /tmp/.env.override -uvicorn app:app --host 0.0.0.0 --port 8888 +uvicorn app:app --host 0.0.0.0 --port 8888 --log-level ${LOGLEVEL:-warning} diff --git a/ee/api/env.default b/ee/api/env.default index d912d2248..674f549e7 100644 --- a/ee/api/env.default +++ b/ee/api/env.default @@ -78,3 +78,4 @@ KAFKA_SERVERS=kafka.db.svc.cluster.local:9092 KAFKA_USE_SSL=false SCH_DELETE_DAYS=30 TZ=UTC +CH_COMPRESSION=true \ No newline at end of file diff --git a/ee/api/requirements-alerts.txt b/ee/api/requirements-alerts.txt index f27c6913a..0dece32c7 100644 --- a/ee/api/requirements-alerts.txt +++ b/ee/api/requirements-alerts.txt @@ -1,15 +1,15 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.31.0 -boto3==1.28.55 +boto3==1.28.62 pyjwt==2.8.0 -psycopg2-binary==2.9.7 +psycopg2-binary==2.9.9 elasticsearch==8.10.0 jira==3.5.2 -fastapi==0.103.1 +fastapi==0.103.2 uvicorn[standard]==0.23.2 python-decouple==3.8 pydantic[email]==2.3.0 diff --git a/ee/api/requirements-crons.txt b/ee/api/requirements-crons.txt index b4cd24d9a..17c7193c3 100644 --- a/ee/api/requirements-crons.txt +++ b/ee/api/requirements-crons.txt @@ -1,15 +1,15 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.31.0 -boto3==1.28.55 +boto3==1.28.62 pyjwt==2.8.0 -psycopg2-binary==2.9.7 +psycopg2-binary==2.9.9 elasticsearch==8.10.0 jira==3.5.2 -fastapi==0.103.1 +fastapi==0.103.2 python-decouple==3.8 pydantic[email]==2.3.0 apscheduler==3.10.4 diff --git a/ee/api/requirements.txt b/ee/api/requirements.txt index 5ac8f64b7..c283303e2 100644 --- a/ee/api/requirements.txt +++ b/ee/api/requirements.txt @@ -1,15 +1,15 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.31.0 -boto3==1.28.55 +boto3==1.28.62 pyjwt==2.8.0 -psycopg2-binary==2.9.7 +psycopg2-binary==2.9.9 elasticsearch==8.10.0 jira==3.5.2 -fastapi==0.103.1 +fastapi==0.103.2 uvicorn[standard]==0.23.2 gunicorn==21.2.0 python-decouple==3.8 diff --git a/ee/api/schemas/schemas_ee.py b/ee/api/schemas/schemas_ee.py index 61d872535..52a09f72a 100644 --- a/ee/api/schemas/schemas_ee.py +++ b/ee/api/schemas/schemas_ee.py @@ -30,6 +30,16 @@ class CurrentContext(schemas.CurrentContext): permissions: List[Union[Permissions, ServicePermissions]] = Field(...) service_account: bool = Field(default=False) + @model_validator(mode="before") + def remove_unsupported_perms(cls, values): + if values.get("permissions") is not None: + perms = [] + for p in values["permissions"]: + if Permissions.has_value(p): + perms.append(p) + values["permissions"] = perms + return values + class RolePayloadSchema(BaseModel): name: str = Field(..., min_length=1, max_length=40)