From 2f693cd490b291a254d31bb873a6bd6b47fc3c54 Mon Sep 17 00:00:00 2001 From: Kraiem Taha Yassine Date: Thu, 5 Dec 2024 17:43:52 +0100 Subject: [PATCH] Dev (#2819) * fix(chalice): fixed Math-operators validation refactor(chalice): search for sessions that have events for heatmaps * refactor(chalice): search for sessions that have at least 1 location event for heatmaps * fix(chalice): fixed Math-operators validation refactor(chalice): search for sessions that have events for heatmaps * refactor(chalice): search for sessions that have at least 1 location event for heatmaps * feat(chalice): autocomplete return top 10 with stats * fix(chalice): fixed autocomplete top 10 meta-filters * refactor(chalice): removed sessions insights refactor(DB): removed sessions insights * refactor(chalice): upgraded dependencies refactor(crons): upgraded dependencies refactor(alerts): upgraded dependencies feat(chalice): moved CH to FOSS feat(chalice): use clickhouse-connect feat(chalice): use CH connexion pool feat(scripts): defined ch-data-port --- api/Pipfile | 9 +- api/app.py | 10 +- api/chalicelib/core/__init__.py | 10 + api/chalicelib/core/custom_metrics.py | 2 - .../chalicelib/core/metrics_ch.py | 0 api/chalicelib/core/product_anaytics2.py | 14 + api/chalicelib/utils/__init__.py | 6 + {ee/api => api}/chalicelib/utils/ch_client.py | 30 +- api/chalicelib/utils/ch_client_exp.py | 179 +++++ api/chalicelib/utils/exp_ch_helper.py | 68 ++ api/env.default | 10 +- api/requirements-alerts.txt | 8 +- api/requirements.txt | 8 +- api/routers/subs/product_anaytics.py | 17 + api/schemas/schemas.py | 28 +- ee/api/.gitignore | 7 + ee/api/Pipfile | 15 +- ee/api/app.py | 9 +- ee/api/chalicelib/core/__init__.py | 4 + ee/api/chalicelib/core/custom_metrics.py | 693 ------------------ ee/api/chalicelib/core/custom_metrics_ee.py | 234 ++++++ ee/api/chalicelib/core/metrics_ch.py | 614 ++++++++++++++++ ee/api/clean-dev.sh | 9 +- ee/api/env.default | 4 +- ee/api/requirements-alerts.txt | 8 +- ee/api/requirements-crons.txt | 8 +- ee/api/requirements.txt | 8 +- ee/api/schemas/schemas_ee.py | 26 - .../db/init_dbs/postgresql/1.22.0/1.22.0.sql | 4 + scripts/docker-compose/alerts.env | 1 + scripts/docker-compose/chalice.env | 1 + .../charts/alerts/templates/deployment.yaml | 2 + .../charts/chalice/templates/deployment.yaml | 2 + .../openreplay/charts/utilities/values.yaml | 1 + .../helmcharts/openreplay/templates/job.yaml | 4 + .../db/init_dbs/postgresql/1.22.0/1.22.0.sql | 4 + 36 files changed, 1260 insertions(+), 797 deletions(-) rename ee/api/chalicelib/core/metrics.py => api/chalicelib/core/metrics_ch.py (100%) create mode 100644 api/chalicelib/core/product_anaytics2.py rename {ee/api => api}/chalicelib/utils/ch_client.py (75%) create mode 100644 api/chalicelib/utils/ch_client_exp.py create mode 100644 api/chalicelib/utils/exp_ch_helper.py create mode 100644 api/routers/subs/product_anaytics.py delete mode 100644 ee/api/chalicelib/core/custom_metrics.py create mode 100644 ee/api/chalicelib/core/custom_metrics_ee.py create mode 100644 ee/api/chalicelib/core/metrics_ch.py diff --git a/api/Pipfile b/api/Pipfile index 1d1d60aa3..d5c52995c 100644 --- a/api/Pipfile +++ b/api/Pipfile @@ -4,21 +4,22 @@ verify_ssl = true name = "pypi" [packages] +sqlparse = "==0.5.2" urllib3 = "==1.26.16" requests = "==2.32.3" -boto3 = "==1.35.72" +boto3 = "==1.35.76" pyjwt = "==2.10.1" psycopg2-binary = "==2.9.10" psycopg = {extras = ["binary", "pool"], version = "==3.2.3"} clickhouse-driver = {extras = ["lz4"], version = "==0.2.9"} -clickhouse-connect = "==0.8.8" +clickhouse-connect = "==0.8.9" elasticsearch = "==8.16.0" jira = "==3.8.0" cachetools = "==5.5.0" -fastapi = "==0.115.5" +fastapi = "==0.115.6" uvicorn = {extras = ["standard"], version = "==0.32.1"} python-decouple = "==3.8" -pydantic = {extras = ["email"], version = "==2.10.2"} +pydantic = {extras = ["email"], version = "==2.10.3"} apscheduler = "==3.11.0" redis = "==5.2.0" diff --git a/api/app.py b/api/app.py index 41775206e..d7e5215a5 100644 --- a/api/app.py +++ b/api/app.py @@ -13,17 +13,16 @@ from psycopg.rows import dict_row from starlette.responses import StreamingResponse from chalicelib.utils import helper -from chalicelib.utils import pg_client +from chalicelib.utils import pg_client, ch_client from crons import core_crons, core_dynamic_crons from routers import core, core_dynamic -from routers.subs import insights, metrics, v1_api, health, usability_tests, spot +from routers.subs import insights, metrics, v1_api, health, usability_tests, spot, product_anaytics loglevel = config("LOGLEVEL", default=logging.WARNING) print(f">Loglevel set to: {loglevel}") logging.basicConfig(level=loglevel) - class ORPYAsyncConnection(AsyncConnection): def __init__(self, *args, **kwargs): @@ -39,6 +38,7 @@ async def lifespan(app: FastAPI): app.schedule = AsyncIOScheduler() await pg_client.init() + await ch_client.init() app.schedule.start() for job in core_crons.cron_jobs + core_dynamic_crons.cron_jobs: @@ -128,3 +128,7 @@ app.include_router(usability_tests.app_apikey) app.include_router(spot.public_app) app.include_router(spot.app) app.include_router(spot.app_apikey) + +app.include_router(product_anaytics.public_app) +app.include_router(product_anaytics.app) +app.include_router(product_anaytics.app_apikey) diff --git a/api/chalicelib/core/__init__.py b/api/chalicelib/core/__init__.py index e69de29bb..ee17d1efa 100644 --- a/api/chalicelib/core/__init__.py +++ b/api/chalicelib/core/__init__.py @@ -0,0 +1,10 @@ +from decouple import config +import logging + +logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) + +if config("EXP_CH_LAYER", cast=bool, default=True): + from . import metrics_ch as metrics + from . import metrics as metrics_legacy +else: + from . import metrics diff --git a/api/chalicelib/core/custom_metrics.py b/api/chalicelib/core/custom_metrics.py index 6a9e0ab10..c3ac00b69 100644 --- a/api/chalicelib/core/custom_metrics.py +++ b/api/chalicelib/core/custom_metrics.py @@ -173,7 +173,6 @@ def get_chart(project: schemas.ProjectContext, data: schemas.CardSchema, user_id schemas.MetricType.TABLE: __get_table_chart, schemas.MetricType.HEAT_MAP: __get_heat_map_chart, schemas.MetricType.FUNNEL: __get_funnel_chart, - schemas.MetricType.INSIGHTS: not_supported, schemas.MetricType.PATH_ANALYSIS: __get_path_analysis_chart } return supported.get(data.metric_type, not_supported)(project=project, data=data, user_id=user_id) @@ -220,7 +219,6 @@ def get_issues(project: schemas.ProjectContext, user_id: int, data: schemas.Card schemas.MetricType.TIMESERIES: not_supported, schemas.MetricType.TABLE: not_supported, schemas.MetricType.HEAT_MAP: not_supported, - schemas.MetricType.INSIGHTS: not_supported, schemas.MetricType.PATH_ANALYSIS: not_supported, } return supported.get(data.metric_type, not_supported)() diff --git a/ee/api/chalicelib/core/metrics.py b/api/chalicelib/core/metrics_ch.py similarity index 100% rename from ee/api/chalicelib/core/metrics.py rename to api/chalicelib/core/metrics_ch.py diff --git a/api/chalicelib/core/product_anaytics2.py b/api/chalicelib/core/product_anaytics2.py new file mode 100644 index 000000000..9e32e088d --- /dev/null +++ b/api/chalicelib/core/product_anaytics2.py @@ -0,0 +1,14 @@ +from chalicelib.utils.ch_client import ClickHouseClient + + +def search_events(project_id: int, data: dict): + with ClickHouseClient() as ch_client: + r = ch_client.format( + """SELECT * + FROM taha.events + WHERE project_id=%(project_id)s + ORDER BY created_at;""", + params={"project_id": project_id}) + x = ch_client.execute(r) + + return x diff --git a/api/chalicelib/utils/__init__.py b/api/chalicelib/utils/__init__.py index df64e4775..54e0b4c65 100644 --- a/api/chalicelib/utils/__init__.py +++ b/api/chalicelib/utils/__init__.py @@ -11,3 +11,9 @@ if smtp.has_smtp(): logger.info("valid SMTP configuration found") else: logger.info("no SMTP configuration found or SMTP validation failed") + +if config("EXP_CH_DRIVER", cast=bool, default=True): + logging.info(">>> Using new CH driver") + from . import ch_client_exp as ch_client +else: + from . import ch_client diff --git a/ee/api/chalicelib/utils/ch_client.py b/api/chalicelib/utils/ch_client.py similarity index 75% rename from ee/api/chalicelib/utils/ch_client.py rename to api/chalicelib/utils/ch_client.py index b7d19b4f9..b0b0259c5 100644 --- a/ee/api/chalicelib/utils/ch_client.py +++ b/api/chalicelib/utils/ch_client.py @@ -3,15 +3,15 @@ import logging import clickhouse_driver from decouple import config -logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) +logger = logging.getLogger(__name__) settings = {} if config('ch_timeout', cast=int, default=-1) > 0: - logging.info(f"CH-max_execution_time set to {config('ch_timeout')}s") + logger.info(f"CH-max_execution_time set to {config('ch_timeout')}s") settings = {**settings, "max_execution_time": config('ch_timeout', cast=int)} if config('ch_receive_timeout', cast=int, default=-1) > 0: - logging.info(f"CH-receive_timeout set to {config('ch_receive_timeout')}s") + logger.info(f"CH-receive_timeout set to {config('ch_receive_timeout')}s") settings = {**settings, "receive_timeout": config('ch_receive_timeout', cast=int)} @@ -41,14 +41,14 @@ class ClickHouseClient: keys = tuple(x for x, y in results[1]) return [dict(zip(keys, i)) for i in results[0]] except Exception as err: - logging.error("--------- CH EXCEPTION -----------") - logging.error(err) - logging.error("--------- CH QUERY EXCEPTION -----------") - logging.error(self.format(query=query, params=params) - .replace('\n', '\\n') - .replace(' ', ' ') - .replace(' ', ' ')) - logging.error("--------------------") + logger.error("--------- CH EXCEPTION -----------") + logger.error(err) + logger.error("--------- CH QUERY EXCEPTION -----------") + logger.error(self.format(query=query, params=params) + .replace('\n', '\\n') + .replace(' ', ' ') + .replace(' ', ' ')) + logger.error("--------------------") raise err def insert(self, query, params=None, **args): @@ -64,3 +64,11 @@ class ClickHouseClient: def __exit__(self, *args): pass + + +async def init(): + logger.info(f">CH_POOL:not defined") + + +async def terminate(): + pass diff --git a/api/chalicelib/utils/ch_client_exp.py b/api/chalicelib/utils/ch_client_exp.py new file mode 100644 index 000000000..af4f20b8b --- /dev/null +++ b/api/chalicelib/utils/ch_client_exp.py @@ -0,0 +1,179 @@ +import logging +import threading +import time +from functools import wraps +from queue import Queue + +import clickhouse_connect +from clickhouse_connect.driver.query import QueryContext +from decouple import config + +logger = logging.getLogger(__name__) +settings = {} +if config('ch_timeout', cast=int, default=-1) > 0: + logging.info(f"CH-max_execution_time set to {config('ch_timeout')}s") + settings = {**settings, "max_execution_time": config('ch_timeout', cast=int)} + +if config('ch_receive_timeout', cast=int, default=-1) > 0: + logging.info(f"CH-receive_timeout set to {config('ch_receive_timeout')}s") + settings = {**settings, "receive_timeout": config('ch_receive_timeout', cast=int)} + +extra_args = {} +if config("CH_COMPRESSION", cast=bool, default=True): + extra_args["compression"] = "lz4" + + +def transform_result(original_function): + @wraps(original_function) + def wrapper(*args, **kwargs): + result = original_function(*args, **kwargs) + if isinstance(result, clickhouse_connect.driver.query.QueryResult): + column_names = result.column_names + result = result.result_rows + result = [dict(zip(column_names, row)) for row in result] + + return result + + return wrapper + + +class ClickHouseConnectionPool: + def __init__(self, min_size, max_size, settings): + self.min_size = min_size + self.max_size = max_size + self.pool = Queue() + self.lock = threading.Lock() + self.total_connections = 0 + self.settings = settings + + # Initialize the pool with min_size connections + for _ in range(self.min_size): + client = clickhouse_connect.get_client(host=config("ch_host"), + database=config("ch_database", default="default"), + user=config("ch_user", default="default"), + password=config("ch_password", default=""), + port=config("ch_port_http", cast=int), + settings=settings, + **extra_args) + self.pool.put(client) + self.total_connections += 1 + + def get_connection(self): + try: + # Try to get a connection without blocking + client = self.pool.get_nowait() + return client + except Empty: + with self.lock: + if self.total_connections < self.max_size: + client = clickhouse_connect.get_client( + host=config("ch_host"), + database=config("ch_database", default="default"), + user=config("ch_user", default="default"), + password=config("ch_password", default=""), + port=config("ch_port_http", cast=int), + settings=settings, + **extra_args + ) + self.total_connections += 1 + return client + # If max_size reached, wait until a connection is available + client = self.pool.get() + return client + + def release_connection(self, client): + self.pool.put(client) + + def close_all(self): + with self.lock: + while not self.pool.empty(): + client = self.pool.get() + client.close() + self.total_connections = 0 + + +CH_pool: ClickHouseConnectionPool = None + +RETRY_MAX = config("CH_RETRY_MAX", cast=int, default=50) +RETRY_INTERVAL = config("CH_RETRY_INTERVAL", cast=int, default=2) +RETRY = 0 + + +def make_pool(): + if not config('CH_POOL', cast=bool, default=True): + return + global CH_pool + global RETRY + if CH_pool is not None: + try: + CH_pool.close_all() + except Exception as error: + logger.error("Error while closing all connexions to CH", error) + try: + CH_pool = ClickHouseConnectionPool(min_size=config("PG_MINCONN", cast=int, default=4), + max_size=config("PG_MAXCONN", cast=int, default=8), + settings=settings) + if CH_pool is not None: + logger.info("Connection pool created successfully for CH") + except Exception as error: + logger.error("Error while connecting to CH", error) + if RETRY < RETRY_MAX: + RETRY += 1 + logger.info(f"waiting for {RETRY_INTERVAL}s before retry n°{RETRY}") + time.sleep(RETRY_INTERVAL) + make_pool() + else: + raise error + + +class ClickHouseClient: + __client = None + + def __init__(self, database=None): + if self.__client is None: + if config('CH_POOL', cast=bool, default=True): + self.__client = CH_pool.get_connection() + else: + self.__client = clickhouse_connect.get_client(host=config("ch_host"), + database=database if database else config("ch_database", + default="default"), + user=config("ch_user", default="default"), + password=config("ch_password", default=""), + port=config("ch_port_http", cast=int), + settings=settings, + **extra_args) + self.__client.execute = transform_result(self.__client.query) + self.__client.format = self.format + + def __enter__(self): + return self.__client + + def format(self, query, *, params=None): + if params is None: + return query + return query % { + key: f"'{value}'" if isinstance(value, str) else value + for key, value in params.items() + } + + def __exit__(self, *args): + if config('CH_POOL', cast=bool, default=True): + CH_pool.release_connection(self.__client) + else: + self.__client.close() + + +async def init(): + logger.info(f">CH_POOL:{config('CH_POOL', default=None)}") + if config('CH_POOL', cast=bool, default=True): + make_pool() + + +async def terminate(): + global CH_pool + if CH_pool is not None: + try: + CH_pool.close_all() + logger.info("Closed all connexions to CH") + except Exception as error: + logger.error("Error while closing all connexions to CH", error) diff --git a/api/chalicelib/utils/exp_ch_helper.py b/api/chalicelib/utils/exp_ch_helper.py new file mode 100644 index 000000000..a15672614 --- /dev/null +++ b/api/chalicelib/utils/exp_ch_helper.py @@ -0,0 +1,68 @@ +from typing import Union + +import schemas +import logging + +logger = logging.getLogger(__name__) + + +def get_main_events_table(timestamp=0, platform="web"): + if platform == "web": + return "experimental.events" + else: + return "experimental.ios_events" + + +def get_main_sessions_table(timestamp=0): + return "experimental.sessions" + + +def get_user_favorite_sessions_table(timestamp=0): + return "experimental.user_favorite_sessions" + + +def get_user_viewed_sessions_table(timestamp=0): + return "experimental.user_viewed_sessions" + + +def get_user_viewed_errors_table(timestamp=0): + return "experimental.user_viewed_errors" + + +def get_main_js_errors_sessions_table(timestamp=0): + return get_main_events_table(timestamp=timestamp) + + +def get_event_type(event_type: Union[schemas.EventType, schemas.PerformanceEventType], platform="web"): + defs = { + schemas.EventType.CLICK: "CLICK", + schemas.EventType.INPUT: "INPUT", + schemas.EventType.LOCATION: "LOCATION", + schemas.PerformanceEventType.LOCATION_DOM_COMPLETE: "LOCATION", + schemas.PerformanceEventType.LOCATION_LARGEST_CONTENTFUL_PAINT_TIME: "LOCATION", + schemas.PerformanceEventType.LOCATION_TTFB: "LOCATION", + schemas.EventType.CUSTOM: "CUSTOM", + schemas.EventType.REQUEST: "REQUEST", + schemas.EventType.REQUEST_DETAILS: "REQUEST", + schemas.PerformanceEventType.FETCH_FAILED: "REQUEST", + schemas.GraphqlFilterType.GRAPHQL_NAME: "GRAPHQL", + schemas.EventType.STATE_ACTION: "STATEACTION", + schemas.EventType.ERROR: "ERROR", + schemas.PerformanceEventType.LOCATION_AVG_CPU_LOAD: 'PERFORMANCE', + schemas.PerformanceEventType.LOCATION_AVG_MEMORY_USAGE: 'PERFORMANCE', + schemas.FetchFilterType.FETCH_URL: 'REQUEST' + } + defs_mobile = { + schemas.EventType.CLICK_MOBILE: "TAP", + schemas.EventType.INPUT_MOBILE: "INPUT", + schemas.EventType.CUSTOM_MOBILE: "CUSTOM", + schemas.EventType.REQUEST_MOBILE: "REQUEST", + schemas.EventType.ERROR_MOBILE: "CRASH", + schemas.EventType.VIEW_MOBILE: "VIEW", + schemas.EventType.SWIPE_MOBILE: "SWIPE" + } + if platform != "web" and event_type in defs_mobile: + return defs_mobile.get(event_type) + if event_type not in defs: + raise Exception(f"unsupported EventType:{event_type}") + return defs.get(event_type) diff --git a/api/env.default b/api/env.default index 8e80f2ea2..f6ca1872b 100644 --- a/api/env.default +++ b/api/env.default @@ -8,6 +8,12 @@ assistList=/sockets-list CANVAS_PATTERN=%(sessionId)s/%(recordingId)s.tar.zst captcha_key= captcha_server= +CH_COMPRESSION=true +ch_host= +ch_port= +ch_port_http= +ch_receive_timeout=10 +ch_timeout=30 change_password_link=/reset-password?invitation=%s&&pass=%s DEVTOOLS_MOB_PATTERN=%(sessionId)s/devtools.mob EFS_DEVTOOLS_MOB_PATTERN=%(sessionId)sdevtools @@ -63,4 +69,6 @@ SITE_URL= sourcemaps_bucket=sourcemaps sourcemaps_reader=http://sourcemapreader-openreplay.app.svc.cluster.local:9000/sourcemaps/{}/sourcemaps STAGE=default-foss -TZ=UTC \ No newline at end of file +TZ=UTC +EXP_CH_DRIVER=true +EXP_CH_LAYER=true \ No newline at end of file diff --git a/api/requirements-alerts.txt b/api/requirements-alerts.txt index 15e5e7e84..4f356e46c 100644 --- a/api/requirements-alerts.txt +++ b/api/requirements-alerts.txt @@ -1,18 +1,20 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.32.3 -boto3==1.35.72 +boto3==1.35.76 pyjwt==2.10.1 psycopg2-binary==2.9.10 psycopg[pool,binary]==3.2.3 +clickhouse-driver[lz4]==0.2.9 +clickhouse-connect==0.8.9 elasticsearch==8.16.0 jira==3.8.0 cachetools==5.5.0 -fastapi==0.115.5 +fastapi==0.115.6 uvicorn[standard]==0.32.1 python-decouple==3.8 -pydantic[email]==2.10.2 +pydantic[email]==2.10.3 apscheduler==3.11.0 diff --git a/api/requirements.txt b/api/requirements.txt index 74f9f77bf..943a36e40 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,22 +1,22 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.32.3 -boto3==1.35.72 +boto3==1.35.76 pyjwt==2.10.1 psycopg2-binary==2.9.10 psycopg[pool,binary]==3.2.3 clickhouse-driver[lz4]==0.2.9 -clickhouse-connect==0.8.8 +clickhouse-connect==0.8.9 elasticsearch==8.16.0 jira==3.8.0 cachetools==5.5.0 -fastapi==0.115.5 +fastapi==0.115.6 uvicorn[standard]==0.32.1 python-decouple==3.8 -pydantic[email]==2.10.2 +pydantic[email]==2.10.3 apscheduler==3.11.0 redis==5.2.0 diff --git a/api/routers/subs/product_anaytics.py b/api/routers/subs/product_anaytics.py new file mode 100644 index 000000000..5f5de83c1 --- /dev/null +++ b/api/routers/subs/product_anaytics.py @@ -0,0 +1,17 @@ +from typing import Union + +import schemas +from chalicelib.core import product_anaytics2 +from fastapi import Body, Depends +from or_dependencies import OR_context +from routers.base import get_routers + + +public_app, app, app_apikey = get_routers() + + +@app.post('/{projectId}/events/search', tags=["dashboard"]) +def search_events(projectId: int, + # data: schemas.CreateDashboardSchema = Body(...), + context: schemas.CurrentContext = Depends(OR_context)): + return product_anaytics2.search_events(project_id=projectId, data={}) diff --git a/api/schemas/schemas.py b/api/schemas/schemas.py index ac636ec95..25d60fcf4 100644 --- a/api/schemas/schemas.py +++ b/api/schemas/schemas.py @@ -921,7 +921,6 @@ class MetricType(str, Enum): RETENTION = "retention" STICKINESS = "stickiness" HEAT_MAP = "heatMap" - INSIGHTS = "insights" class MetricOfErrors(str, Enum): @@ -1197,31 +1196,6 @@ class CardHeatMap(__CardSchema): return self -class MetricOfInsights(str, Enum): - ISSUE_CATEGORIES = "issueCategories" - - -class CardInsights(__CardSchema): - metric_type: Literal[MetricType.INSIGHTS] - metric_of: MetricOfInsights = Field(default=MetricOfInsights.ISSUE_CATEGORIES) - view_type: MetricOtherViewType = Field(...) - - @model_validator(mode="before") - @classmethod - def __enforce_default(cls, values): - values["view_type"] = MetricOtherViewType.LIST_CHART - return values - - @model_validator(mode="after") - def __transform(self): - self.metric_of = MetricOfInsights(self.metric_of) - return self - - @model_validator(mode="after") - def restrictions(self): - raise ValueError(f"metricType:{MetricType.INSIGHTS} not supported yet.") - - class CardPathAnalysisSeriesSchema(CardSeriesSchema): name: Optional[str] = Field(default=None) filter: PathAnalysisSchema = Field(...) @@ -1298,7 +1272,7 @@ __cards_union_base = Union[ CardErrors, CardWebVital, CardHeatMap, CardPathAnalysis] -CardSchema = ORUnion(Union[__cards_union_base, CardInsights], discriminator='metric_type') +CardSchema = ORUnion(__cards_union_base, discriminator='metric_type') class UpdateCardStatusSchema(BaseModel): diff --git a/ee/api/.gitignore b/ee/api/.gitignore index 9a62041d0..f1acae182 100644 --- a/ee/api/.gitignore +++ b/ee/api/.gitignore @@ -194,6 +194,9 @@ Pipfile.lock /chalicelib/core/collaboration_msteams.py /chalicelib/core/collaboration_slack.py /chalicelib/core/countries.py +/chalicelib/core/metrics.py +/chalicelib/core/metrics_ch.py +/chalicelib/core/custom_metrics.py /chalicelib/core/custom_metrics_predefined.py /chalicelib/core/dashboards.py /chalicelib/core/errors_favorite.py @@ -279,3 +282,7 @@ Pipfile.lock /chalicelib/core/unprocessed_sessions.py /run-db_init-dev.sh /.dev/ +/chalicelib/core/product_anaytics2.py +/chalicelib/utils/ch_client.py +/chalicelib/utils/ch_client_exp.py +/routers/subs/product_anaytics.py diff --git a/ee/api/Pipfile b/ee/api/Pipfile index a3a45e591..ee2617767 100644 --- a/ee/api/Pipfile +++ b/ee/api/Pipfile @@ -6,20 +6,21 @@ name = "pypi" [packages] urllib3 = "==1.26.16" requests = "==2.32.3" -boto3 = "==1.35.60" -pyjwt = "==2.9.0" +boto3 = "==1.35.76" +pyjwt = "==2.10.1" psycopg2-binary = "==2.9.10" psycopg = {extras = ["binary", "pool"], version = "==3.2.3"} +clickhouse-driver = {extras = ["lz4"], version = "==0.2.9"} +clickhouse-connect = "==0.8.9" elasticsearch = "==8.16.0" jira = "==3.8.0" cachetools = "==5.5.0" -fastapi = "==0.115.5" -uvicorn = {extras = ["standard"], version = "==0.32.0"} +fastapi = "==0.115.6" +uvicorn = {extras = ["standard"], version = "==0.32.1"} gunicorn = "==23.0.0" python-decouple = "==3.8" -pydantic = {extras = ["email"], version = "==2.9.2"} -apscheduler = "==3.10.4" -clickhouse-driver = {extras = ["lz4"], version = "==0.2.9"} +pydantic = {extras = ["email"], version = "==2.10.3"} +apscheduler = "==3.11.0" python3-saml = "==1.16.0" python-multipart = "==0.0.17" redis = "==5.2.0" diff --git a/ee/api/app.py b/ee/api/app.py index 7a24c7683..7ad085882 100644 --- a/ee/api/app.py +++ b/ee/api/app.py @@ -17,11 +17,11 @@ from starlette.responses import StreamingResponse, JSONResponse from chalicelib.core import traces from chalicelib.utils import events_queue from chalicelib.utils import helper -from chalicelib.utils import pg_client +from chalicelib.utils import pg_client, ch_client from crons import core_crons, ee_crons, core_dynamic_crons from routers import core, core_dynamic from routers import ee -from routers.subs import insights, metrics, v1_api, health, usability_tests, spot +from routers.subs import insights, metrics, v1_api, health, usability_tests, spot, product_anaytics from routers.subs import v1_api_ee if config("ENABLE_SSO", cast=bool, default=True): @@ -48,6 +48,7 @@ async def lifespan(app: FastAPI): app.schedule = AsyncIOScheduler() app.queue_system = queue.Queue() await pg_client.init() + await ch_client.init() await events_queue.init() app.schedule.start() @@ -149,6 +150,10 @@ app.include_router(spot.public_app) app.include_router(spot.app) app.include_router(spot.app_apikey) +app.include_router(product_anaytics.public_app) +app.include_router(product_anaytics.app) +app.include_router(product_anaytics.app_apikey) + if config("ENABLE_SSO", cast=bool, default=True): app.include_router(saml.public_app) app.include_router(saml.app) diff --git a/ee/api/chalicelib/core/__init__.py b/ee/api/chalicelib/core/__init__.py index 88c8528a7..1f0feb085 100644 --- a/ee/api/chalicelib/core/__init__.py +++ b/ee/api/chalicelib/core/__init__.py @@ -4,6 +4,10 @@ import logging logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO)) from . import sessions as sessions_legacy +from . import custom_metrics as custom_metrics_legacy +from . import custom_metrics_ee as custom_metrics +from . import metrics_ch as metrics +from . import metrics as metrics_legacy if config("EXP_SESSIONS_SEARCH", cast=bool, default=False): logging.info(">>> Using experimental sessions search") diff --git a/ee/api/chalicelib/core/custom_metrics.py b/ee/api/chalicelib/core/custom_metrics.py deleted file mode 100644 index c8bbfef8b..000000000 --- a/ee/api/chalicelib/core/custom_metrics.py +++ /dev/null @@ -1,693 +0,0 @@ -import json -import logging - -from decouple import config -from fastapi import HTTPException, status - -import schemas -from chalicelib.core import funnels, issues, heatmaps, sessions_mobs, sessions_favorite, \ - product_analytics, custom_metrics_predefined -from chalicelib.utils import helper, pg_client -from chalicelib.utils.TimeUTC import TimeUTC -from chalicelib.utils.storage import extra - -if config("EXP_ERRORS_SEARCH", cast=bool, default=False): - logging.info(">>> Using experimental error search") - from . import errors_exp as errors -else: - from . import errors as errors - -if config("EXP_SESSIONS_SEARCH_METRIC", cast=bool, default=False): - from chalicelib.core import sessions -else: - from chalicelib.core import sessions_legacy as sessions - -logger = logging.getLogger(__name__) - - -# TODO: refactor this to split -# timeseries / -# table of errors / table of issues / table of browsers / table of devices / table of countries / table of URLs -# remove "table of" calls from this function -def __try_live(project_id, data: schemas.CardSchema): - results = [] - for i, s in enumerate(data.series): - results.append(sessions.search2_series(data=s.filter, project_id=project_id, density=data.density, - view_type=data.view_type, metric_type=data.metric_type, - metric_of=data.metric_of, metric_value=data.metric_value)) - - return results - - -def __get_table_of_series(project_id, data: schemas.CardSchema): - results = [] - for i, s in enumerate(data.series): - results.append(sessions.search2_table(data=s.filter, project_id=project_id, density=data.density, - metric_of=data.metric_of, metric_value=data.metric_value, - metric_format=data.metric_format)) - - return results - - -def __get_funnel_chart(project: schemas.ProjectContext, data: schemas.CardFunnel, user_id: int = None): - if len(data.series) == 0: - return { - "stages": [], - "totalDropDueToIssues": 0 - } - - # return funnels.get_top_insights_on_the_fly_widget(project_id=project_id, - # data=data.series[0].filter, - # metric_format=data.metric_format) - return funnels.get_simple_funnel(project=project, - data=data.series[0].filter, - metric_format=data.metric_format) - - -def __get_errors_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): - if len(data.series) == 0: - return { - "total": 0, - "errors": [] - } - return errors.search(data.series[0].filter, project_id=project.project_id, user_id=user_id) - - -def __get_sessions_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): - if len(data.series) == 0: - logger.debug("empty series") - return { - "total": 0, - "sessions": [] - } - return sessions.search_sessions(data=data.series[0].filter, project_id=project.project_id, user_id=user_id) - - -def __get_heat_map_chart(project: schemas.ProjectContext, user_id, data: schemas.CardHeatMap, - include_mobs: bool = True): - if len(data.series) == 0: - return None - data.series[0].filter.filters += data.series[0].filter.events - data.series[0].filter.events = [] - return heatmaps.search_short_session(project_id=project.project_id, user_id=user_id, - data=schemas.HeatMapSessionsSearch( - **data.series[0].filter.model_dump()), - include_mobs=include_mobs) - - -def __get_path_analysis_chart(project: schemas.ProjectContext, user_id: int, data: schemas.CardPathAnalysis): - if len(data.series) == 0: - data.series.append( - schemas.CardPathAnalysisSeriesSchema(startTimestamp=data.startTimestamp, endTimestamp=data.endTimestamp)) - elif not isinstance(data.series[0].filter, schemas.PathAnalysisSchema): - data.series[0].filter = schemas.PathAnalysisSchema() - - return product_analytics.path_analysis(project_id=project.project_id, data=data) - - -def __get_timeseries_chart(project: schemas.ProjectContext, data: schemas.CardTimeSeries, user_id: int = None): - series_charts = __try_live(project_id=project.project_id, data=data) - results = [{}] * len(series_charts[0]) - for i in range(len(results)): - for j, series_chart in enumerate(series_charts): - results[i] = {**results[i], "timestamp": series_chart[i]["timestamp"], - data.series[j].name if data.series[j].name else j + 1: series_chart[i]["count"]} - return results - - -def not_supported(**args): - raise Exception("not supported") - - -def __get_table_of_user_ids(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_sessions(project: schemas.ProjectContext, data: schemas.CardTable, user_id): - return __get_sessions_list(project=project, user_id=user_id, data=data) - - -def __get_table_of_errors(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int): - return __get_errors_list(project=project, user_id=user_id, data=data) - - -def __get_table_of_issues(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_browsers(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_devises(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_countries(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_urls(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_referrers(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_of_requests(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int = None): - return __get_table_of_series(project_id=project.project_id, data=data) - - -def __get_table_chart(project: schemas.ProjectContext, data: schemas.CardTable, user_id: int): - supported = { - schemas.MetricOfTable.SESSIONS: __get_table_of_sessions, - schemas.MetricOfTable.ERRORS: __get_table_of_errors, - schemas.MetricOfTable.USER_ID: __get_table_of_user_ids, - schemas.MetricOfTable.ISSUES: __get_table_of_issues, - schemas.MetricOfTable.USER_BROWSER: __get_table_of_browsers, - schemas.MetricOfTable.USER_DEVICE: __get_table_of_devises, - schemas.MetricOfTable.USER_COUNTRY: __get_table_of_countries, - schemas.MetricOfTable.VISITED_URL: __get_table_of_urls, - schemas.MetricOfTable.REFERRER: __get_table_of_referrers, - schemas.MetricOfTable.FETCH: __get_table_of_requests - } - return supported.get(data.metric_of, not_supported)(project=project, data=data, user_id=user_id) - - -def get_chart(project: schemas.ProjectContext, data: schemas.CardSchema, user_id: int): - if data.is_predefined: - return custom_metrics_predefined.get_metric(key=data.metric_of, - project_id=project.project_id, - data=data.model_dump()) - - supported = { - schemas.MetricType.TIMESERIES: __get_timeseries_chart, - schemas.MetricType.TABLE: __get_table_chart, - schemas.MetricType.HEAT_MAP: __get_heat_map_chart, - schemas.MetricType.FUNNEL: __get_funnel_chart, - schemas.MetricType.PATH_ANALYSIS: __get_path_analysis_chart - } - return supported.get(data.metric_type, not_supported)(project=project, data=data, user_id=user_id) - - -def get_sessions_by_card_id(project_id, user_id, metric_id, data: schemas.CardSessionsSchema): - # No need for this because UI is sending the full payload - # card: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) - # if card is None: - # return None - # metric: schemas.CardSchema = schemas.CardSchema(**card) - # metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) - if not card_exists(metric_id=metric_id, project_id=project_id, user_id=user_id): - return None - results = [] - for s in data.series: - results.append({"seriesId": s.series_id, "seriesName": s.name, - **sessions.search_sessions(data=s.filter, project_id=project_id, user_id=user_id)}) - - return results - - -def get_sessions(project_id, user_id, data: schemas.CardSessionsSchema): - results = [] - if len(data.series) == 0: - return results - for s in data.series: - if len(data.filters) > 0: - s.filter.filters += data.filters - s.filter = schemas.SessionsSearchPayloadSchema(**s.filter.model_dump(by_alias=True)) - - results.append({"seriesId": None, "seriesName": s.name, - **sessions.search_sessions(data=s.filter, project_id=project_id, user_id=user_id)}) - - return results - - -def get_issues(project: schemas.ProjectContext, user_id: int, data: schemas.CardSchema): - if data.is_predefined: - return not_supported() - if data.metric_of == schemas.MetricOfTable.ISSUES: - return __get_table_of_issues(project=project, user_id=user_id, data=data) - supported = { - schemas.MetricType.TIMESERIES: not_supported, - schemas.MetricType.TABLE: not_supported, - schemas.MetricType.HEAT_MAP: not_supported, - schemas.MetricType.INSIGHTS: not_supported, - schemas.MetricType.PATH_ANALYSIS: not_supported, - } - return supported.get(data.metric_type, not_supported)() - - -def __get_path_analysis_card_info(data: schemas.CardPathAnalysis): - r = {"start_point": [s.model_dump() for s in data.start_point], - "start_type": data.start_type, - "excludes": [e.model_dump() for e in data.excludes], - "hideExcess": data.hide_excess} - return r - - -def create_card(project: schemas.ProjectContext, user_id, data: schemas.CardSchema, dashboard=False): - with pg_client.PostgresClient() as cur: - session_data = None - if data.metric_type == schemas.MetricType.HEAT_MAP: - if data.session_id is not None: - session_data = {"sessionId": data.session_id} - else: - session_data = __get_heat_map_chart(project=project, user_id=user_id, - data=data, include_mobs=False) - if session_data is not None: - session_data = {"sessionId": session_data["sessionId"]} - - if session_data is not None: - # for EE only - keys = sessions_mobs. \ - __get_mob_keys(project_id=project.project_id, session_id=session_data["sessionId"]) - keys += sessions_mobs. \ - __get_mob_keys_deprecated(session_id=session_data["sessionId"]) # To support old sessions - tag = config('RETENTION_L_VALUE', default='vault') - for k in keys: - try: - extra.tag_session(file_key=k, tag_value=tag) - except Exception as e: - logger.warning(f"!!!Error while tagging: {k} to {tag} for heatMap") - logger.error(str(e)) - - _data = {"session_data": json.dumps(session_data) if session_data is not None else None} - for i, s in enumerate(data.series): - for k in s.model_dump().keys(): - _data[f"{k}_{i}"] = s.__getattribute__(k) - _data[f"index_{i}"] = i - _data[f"filter_{i}"] = s.filter.json() - series_len = len(data.series) - params = {"user_id": user_id, "project_id": project.project_id, **data.model_dump(), **_data, - "default_config": json.dumps(data.default_config.model_dump()), "card_info": None} - if data.metric_type == schemas.MetricType.PATH_ANALYSIS: - params["card_info"] = json.dumps(__get_path_analysis_card_info(data=data)) - - query = """INSERT INTO metrics (project_id, user_id, name, is_public, - view_type, metric_type, metric_of, metric_value, - metric_format, default_config, thumbnail, data, - card_info) - VALUES (%(project_id)s, %(user_id)s, %(name)s, %(is_public)s, - %(view_type)s, %(metric_type)s, %(metric_of)s, %(metric_value)s, - %(metric_format)s, %(default_config)s, %(thumbnail)s, %(session_data)s, - %(card_info)s) - RETURNING metric_id""" - if len(data.series) > 0: - query = f"""WITH m AS ({query}) - INSERT INTO metric_series(metric_id, index, name, filter) - VALUES {",".join([f"((SELECT metric_id FROM m), %(index_{i})s, %(name_{i})s, %(filter_{i})s::jsonb)" - for i in range(series_len)])} - RETURNING metric_id;""" - - query = cur.mogrify(query, params) - cur.execute(query) - r = cur.fetchone() - if dashboard: - return r["metric_id"] - return {"data": get_card(metric_id=r["metric_id"], project_id=project.project_id, user_id=user_id)} - - -def update_card(metric_id, user_id, project_id, data: schemas.CardSchema): - metric: dict = get_card(metric_id=metric_id, project_id=project_id, - user_id=user_id, flatten=False, include_data=True) - if metric is None: - return None - series_ids = [r["seriesId"] for r in metric["series"]] - n_series = [] - d_series_ids = [] - u_series = [] - u_series_ids = [] - params = {"metric_id": metric_id, "is_public": data.is_public, "name": data.name, - "user_id": user_id, "project_id": project_id, "view_type": data.view_type, - "metric_type": data.metric_type, "metric_of": data.metric_of, - "metric_value": data.metric_value, "metric_format": data.metric_format, - "config": json.dumps(data.default_config.model_dump()), "thumbnail": data.thumbnail} - for i, s in enumerate(data.series): - prefix = "u_" - if s.index is None: - s.index = i - if s.series_id is None or s.series_id not in series_ids: - n_series.append({"i": i, "s": s}) - prefix = "n_" - else: - u_series.append({"i": i, "s": s}) - u_series_ids.append(s.series_id) - ns = s.model_dump() - for k in ns.keys(): - if k == "filter": - ns[k] = json.dumps(ns[k]) - params[f"{prefix}{k}_{i}"] = ns[k] - for i in series_ids: - if i not in u_series_ids: - d_series_ids.append(i) - params["d_series_ids"] = tuple(d_series_ids) - params["card_info"] = None - params["session_data"] = json.dumps(metric["data"]) - if data.metric_type == schemas.MetricType.PATH_ANALYSIS: - params["card_info"] = json.dumps(__get_path_analysis_card_info(data=data)) - elif data.metric_type == schemas.MetricType.HEAT_MAP: - if data.session_id is not None: - params["session_data"] = json.dumps({"sessionId": data.session_id}) - elif metric.get("data") and metric["data"].get("sessionId"): - params["session_data"] = json.dumps({"sessionId": metric["data"]["sessionId"]}) - - with pg_client.PostgresClient() as cur: - sub_queries = [] - if len(n_series) > 0: - sub_queries.append(f"""\ - n AS (INSERT INTO metric_series (metric_id, index, name, filter) - VALUES {",".join([f"(%(metric_id)s, %(n_index_{s['i']})s, %(n_name_{s['i']})s, %(n_filter_{s['i']})s::jsonb)" - for s in n_series])} - RETURNING 1)""") - if len(u_series) > 0: - sub_queries.append(f"""\ - u AS (UPDATE metric_series - SET name=series.name, - filter=series.filter, - index=series.index - FROM (VALUES {",".join([f"(%(u_series_id_{s['i']})s,%(u_index_{s['i']})s,%(u_name_{s['i']})s,%(u_filter_{s['i']})s::jsonb)" - for s in u_series])}) AS series(series_id, index, name, filter) - WHERE metric_series.metric_id =%(metric_id)s AND metric_series.series_id=series.series_id - RETURNING 1)""") - if len(d_series_ids) > 0: - sub_queries.append("""\ - d AS (DELETE FROM metric_series WHERE metric_id =%(metric_id)s AND series_id IN %(d_series_ids)s - RETURNING 1)""") - query = cur.mogrify(f"""\ - {"WITH " if len(sub_queries) > 0 else ""}{",".join(sub_queries)} - UPDATE metrics - SET name = %(name)s, is_public= %(is_public)s, - view_type= %(view_type)s, metric_type= %(metric_type)s, - metric_of= %(metric_of)s, metric_value= %(metric_value)s, - metric_format= %(metric_format)s, - edited_at = timezone('utc'::text, now()), - default_config = %(config)s, - thumbnail = %(thumbnail)s, - card_info = %(card_info)s, - data = %(session_data)s - WHERE metric_id = %(metric_id)s - AND project_id = %(project_id)s - AND (user_id = %(user_id)s OR is_public) - RETURNING metric_id;""", params) - cur.execute(query) - return get_card(metric_id=metric_id, project_id=project_id, user_id=user_id) - - -def search_all(project_id, user_id, data: schemas.SearchCardsSchema, include_series=False): - constraints = ["metrics.project_id = %(project_id)s", - "metrics.deleted_at ISNULL"] - params = {"project_id": project_id, "user_id": user_id, - "offset": (data.page - 1) * data.limit, - "limit": data.limit, } - if data.mine_only: - constraints.append("user_id = %(user_id)s") - else: - constraints.append("(user_id = %(user_id)s OR metrics.is_public)") - if data.shared_only: - constraints.append("is_public") - - if data.query is not None and len(data.query) > 0: - constraints.append("(name ILIKE %(query)s OR owner.owner_email ILIKE %(query)s)") - params["query"] = helper.values_for_operator(value=data.query, - op=schemas.SearchEventOperator.CONTAINS) - with pg_client.PostgresClient() as cur: - sub_join = "" - if include_series: - sub_join = """LEFT JOIN LATERAL (SELECT COALESCE(jsonb_agg(metric_series.* ORDER BY index),'[]'::jsonb) AS series - FROM metric_series - WHERE metric_series.metric_id = metrics.metric_id - AND metric_series.deleted_at ISNULL - ) AS metric_series ON (TRUE)""" - query = cur.mogrify( - f"""SELECT metric_id, project_id, user_id, name, is_public, created_at, edited_at, - metric_type, metric_of, metric_format, metric_value, view_type, is_pinned, - dashboards, owner_email, owner_name, default_config AS config, thumbnail - FROM metrics - {sub_join} - LEFT JOIN LATERAL (SELECT COALESCE(jsonb_agg(connected_dashboards.* ORDER BY is_public,name),'[]'::jsonb) AS dashboards - FROM (SELECT DISTINCT dashboard_id, name, is_public - FROM dashboards INNER JOIN dashboard_widgets USING (dashboard_id) - WHERE deleted_at ISNULL - AND dashboard_widgets.metric_id = metrics.metric_id - AND project_id = %(project_id)s - AND ((dashboards.user_id = %(user_id)s OR is_public))) AS connected_dashboards - ) AS connected_dashboards ON (TRUE) - LEFT JOIN LATERAL (SELECT email AS owner_email, name AS owner_name - FROM users - WHERE deleted_at ISNULL - AND users.user_id = metrics.user_id - ) AS owner ON (TRUE) - WHERE {" AND ".join(constraints)} - ORDER BY created_at {data.order.value} - LIMIT %(limit)s OFFSET %(offset)s;""", params) - logger.debug("---------") - logger.debug(query) - logger.debug("---------") - cur.execute(query) - rows = cur.fetchall() - if include_series: - for r in rows: - for s in r["series"]: - s["filter"] = helper.old_search_payload_to_flat(s["filter"]) - else: - for r in rows: - r["created_at"] = TimeUTC.datetime_to_timestamp(r["created_at"]) - r["edited_at"] = TimeUTC.datetime_to_timestamp(r["edited_at"]) - rows = helper.list_to_camel_case(rows) - return rows - - -def get_all(project_id, user_id): - default_search = schemas.SearchCardsSchema() - rows = search_all(project_id=project_id, user_id=user_id, data=default_search) - result = rows - while len(rows) == default_search.limit: - default_search.page += 1 - rows = search_all(project_id=project_id, user_id=user_id, data=default_search) - result += rows - - return result - - -def delete_card(project_id, metric_id, user_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify("""\ - UPDATE public.metrics - SET deleted_at = timezone('utc'::text, now()), edited_at = timezone('utc'::text, now()) - WHERE project_id = %(project_id)s - AND metric_id = %(metric_id)s - AND (user_id = %(user_id)s OR is_public) - RETURNING data;""", - {"metric_id": metric_id, "project_id": project_id, "user_id": user_id}) - ) - # for EE only - row = cur.fetchone() - if row: - if row["data"] and not sessions_favorite.favorite_session_exists(session_id=row["data"]["sessionId"]): - keys = sessions_mobs. \ - __get_mob_keys(project_id=project_id, session_id=row["data"]["sessionId"]) - keys += sessions_mobs. \ - __get_mob_keys_deprecated(session_id=row["data"]["sessionId"]) # To support old sessions - tag = config('RETENTION_D_VALUE', default='default') - for k in keys: - try: - extra.tag_session(file_key=k, tag_value=tag) - except Exception as e: - logger.warning(f"!!!Error while tagging: {k} to {tag} for heatMap") - logger.error(str(e)) - return {"state": "success"} - - -def __get_path_analysis_attributes(row): - card_info = row.pop("cardInfo") - row["excludes"] = card_info.get("excludes", []) - row["startPoint"] = card_info.get("startPoint", []) - row["startType"] = card_info.get("startType", "start") - row["hideExcess"] = card_info.get("hideExcess", False) - return row - - -def get_card(metric_id, project_id, user_id, flatten: bool = True, include_data: bool = False): - with pg_client.PostgresClient() as cur: - query = cur.mogrify( - f"""SELECT metric_id, project_id, user_id, name, is_public, created_at, deleted_at, edited_at, metric_type, - view_type, metric_of, metric_value, metric_format, is_pinned, default_config, - default_config AS config,series, dashboards, owner_email, card_info - {',data' if include_data else ''} - FROM metrics - LEFT JOIN LATERAL (SELECT COALESCE(jsonb_agg(metric_series.* ORDER BY index),'[]'::jsonb) AS series - FROM metric_series - WHERE metric_series.metric_id = metrics.metric_id - AND metric_series.deleted_at ISNULL - ) AS metric_series ON (TRUE) - LEFT JOIN LATERAL (SELECT COALESCE(jsonb_agg(connected_dashboards.* ORDER BY is_public,name),'[]'::jsonb) AS dashboards - FROM (SELECT dashboard_id, name, is_public - FROM dashboards INNER JOIN dashboard_widgets USING (dashboard_id) - WHERE deleted_at ISNULL - AND project_id = %(project_id)s - AND ((dashboards.user_id = %(user_id)s OR is_public)) - AND metric_id = %(metric_id)s) AS connected_dashboards - ) AS connected_dashboards ON (TRUE) - LEFT JOIN LATERAL (SELECT email AS owner_email - FROM users - WHERE deleted_at ISNULL - AND users.user_id = metrics.user_id - ) AS owner ON (TRUE) - WHERE metrics.project_id = %(project_id)s - AND metrics.deleted_at ISNULL - AND (metrics.user_id = %(user_id)s OR metrics.is_public) - AND metrics.metric_id = %(metric_id)s - ORDER BY created_at;""", - {"metric_id": metric_id, "project_id": project_id, "user_id": user_id} - ) - cur.execute(query) - row = cur.fetchone() - if row is None: - return None - row["created_at"] = TimeUTC.datetime_to_timestamp(row["created_at"]) - row["edited_at"] = TimeUTC.datetime_to_timestamp(row["edited_at"]) - if flatten: - for s in row["series"]: - s["filter"] = helper.old_search_payload_to_flat(s["filter"]) - row = helper.dict_to_camel_case(row) - if row["metricType"] == schemas.MetricType.PATH_ANALYSIS: - row = __get_path_analysis_attributes(row=row) - return row - - -def get_series_for_alert(project_id, user_id): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify( - """SELECT series_id AS value, - metrics.name || '.' || (COALESCE(metric_series.name, 'series ' || index)) || '.count' AS name, - 'count' AS unit, - FALSE AS predefined, - metric_id, - series_id - FROM metric_series - INNER JOIN metrics USING (metric_id) - WHERE metrics.deleted_at ISNULL - AND metrics.project_id = %(project_id)s - AND metrics.metric_type = 'timeseries' - AND (user_id = %(user_id)s OR is_public) - ORDER BY name;""", - {"project_id": project_id, "user_id": user_id} - ) - ) - rows = cur.fetchall() - return helper.list_to_camel_case(rows) - - -def change_state(project_id, metric_id, user_id, status): - with pg_client.PostgresClient() as cur: - cur.execute( - cur.mogrify("""\ - UPDATE public.metrics - SET active = %(status)s - WHERE metric_id = %(metric_id)s - AND (user_id = %(user_id)s OR is_public);""", - {"metric_id": metric_id, "status": status, "user_id": user_id}) - ) - return get_card(metric_id=metric_id, project_id=project_id, user_id=user_id) - - -def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, - data: schemas.CardSessionsSchema - # , range_value=None, start_date=None, end_date=None - ): - # No need for this because UI is sending the full payload - # card: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) - # if card is None: - # return None - # metric: schemas.CardSchema = schemas.CardSchema(**card) - # metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) - # if metric is None: - # return None - if not card_exists(metric_id=metric_id, project_id=project_id, user_id=user_id): - return None - for s in data.series: - s.filter.startTimestamp = data.startTimestamp - s.filter.endTimestamp = data.endTimestamp - s.filter.limit = data.limit - s.filter.page = data.page - issues_list = funnels.get_issues_on_the_fly_widget(project_id=project_id, data=s.filter).get("issues", {}) - issues_list = issues_list.get("significant", []) + issues_list.get("insignificant", []) - issue = None - for i in issues_list: - if i.get("issueId", "") == issue_id: - issue = i - break - if issue is None: - issue = issues.get(project_id=project_id, issue_id=issue_id) - if issue is not None: - issue = {**issue, - "affectedSessions": 0, - "affectedUsers": 0, - "conversionImpact": 0, - "lostConversions": 0, - "unaffectedSessions": 0} - return {"seriesId": s.series_id, "seriesName": s.name, - "sessions": sessions.search_sessions(user_id=user_id, project_id=project_id, - issue=issue, data=s.filter) - if issue is not None else {"total": 0, "sessions": []}, - "issue": issue} - - -def make_chart_from_card(project: schemas.ProjectContext, user_id, metric_id, data: schemas.CardSessionsSchema): - raw_metric: dict = get_card(metric_id=metric_id, project_id=project.project_id, user_id=user_id, include_data=True) - - if raw_metric is None: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="card not found") - raw_metric["startTimestamp"] = data.startTimestamp - raw_metric["endTimestamp"] = data.endTimestamp - raw_metric["limit"] = data.limit - raw_metric["density"] = data.density - metric: schemas.CardSchema = schemas.CardSchema(**raw_metric) - - if metric.is_predefined: - return custom_metrics_predefined.get_metric(key=metric.metric_of, - project_id=project.project_id, - data=data.model_dump()) - elif metric.metric_type == schemas.MetricType.HEAT_MAP: - if raw_metric["data"] and raw_metric["data"].get("sessionId"): - return heatmaps.get_selected_session(project_id=project.project_id, - session_id=raw_metric["data"]["sessionId"]) - else: - return heatmaps.search_short_session(project_id=project.project_id, - data=schemas.HeatMapSessionsSearch(**metric.model_dump()), - user_id=user_id) - - return get_chart(project=project, data=metric, user_id=user_id) - - -def card_exists(metric_id, project_id, user_id) -> bool: - with pg_client.PostgresClient() as cur: - query = cur.mogrify( - f"""SELECT 1 - FROM metrics - LEFT JOIN LATERAL (SELECT COALESCE(jsonb_agg(connected_dashboards.* ORDER BY is_public,name),'[]'::jsonb) AS dashboards - FROM (SELECT dashboard_id, name, is_public - FROM dashboards INNER JOIN dashboard_widgets USING (dashboard_id) - WHERE deleted_at ISNULL - AND project_id = %(project_id)s - AND ((dashboards.user_id = %(user_id)s OR is_public)) - AND metric_id = %(metric_id)s) AS connected_dashboards - ) AS connected_dashboards ON (TRUE) - LEFT JOIN LATERAL (SELECT email AS owner_email - FROM users - WHERE deleted_at ISNULL - AND users.user_id = metrics.user_id - ) AS owner ON (TRUE) - WHERE metrics.project_id = %(project_id)s - AND metrics.deleted_at ISNULL - AND (metrics.user_id = %(user_id)s OR metrics.is_public) - AND metrics.metric_id = %(metric_id)s - ORDER BY created_at;""", - {"metric_id": metric_id, "project_id": project_id, "user_id": user_id} - ) - cur.execute(query) - row = cur.fetchone() - return row is not None diff --git a/ee/api/chalicelib/core/custom_metrics_ee.py b/ee/api/chalicelib/core/custom_metrics_ee.py new file mode 100644 index 000000000..d80a97b9d --- /dev/null +++ b/ee/api/chalicelib/core/custom_metrics_ee.py @@ -0,0 +1,234 @@ +import json +import logging + +from decouple import config +from fastapi import HTTPException, status +from .custom_metrics import * +import schemas +from chalicelib.core import funnels, issues, heatmaps, sessions_mobs, sessions_favorite, \ + product_analytics, custom_metrics_predefined +from chalicelib.utils import helper, pg_client +from chalicelib.utils.TimeUTC import TimeUTC +from chalicelib.utils.storage import extra + +if config("EXP_ERRORS_SEARCH", cast=bool, default=False): + logging.info(">>> Using experimental error search") + from . import errors_exp as errors +else: + from . import errors as errors + +if config("EXP_SESSIONS_SEARCH_METRIC", cast=bool, default=False): + from chalicelib.core import sessions +else: + from chalicelib.core import sessions_legacy as sessions + +logger = logging.getLogger(__name__) + + +# TODO: refactor this to split +# timeseries / +# table of errors / table of issues / table of browsers / table of devices / table of countries / table of URLs +# remove "table of" calls from this function +def __try_live(project_id, data: schemas.CardSchema): + results = [] + for i, s in enumerate(data.series): + results.append(sessions.search2_series(data=s.filter, project_id=project_id, density=data.density, + view_type=data.view_type, metric_type=data.metric_type, + metric_of=data.metric_of, metric_value=data.metric_value)) + + return results + + +def __get_table_of_series(project_id, data: schemas.CardSchema): + results = [] + for i, s in enumerate(data.series): + results.append(sessions.search2_table(data=s.filter, project_id=project_id, density=data.density, + metric_of=data.metric_of, metric_value=data.metric_value, + metric_format=data.metric_format)) + + return results + + +def __get_errors_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): + if len(data.series) == 0: + return { + "total": 0, + "errors": [] + } + return errors.search(data.series[0].filter, project_id=project.project_id, user_id=user_id) + + +def __get_sessions_list(project: schemas.ProjectContext, user_id, data: schemas.CardSchema): + if len(data.series) == 0: + logger.debug("empty series") + return { + "total": 0, + "sessions": [] + } + return sessions.search_sessions(data=data.series[0].filter, project_id=project.project_id, user_id=user_id) + + +def get_sessions_by_card_id(project_id, user_id, metric_id, data: schemas.CardSessionsSchema): + # No need for this because UI is sending the full payload + # card: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) + # if card is None: + # return None + # metric: schemas.CardSchema = schemas.CardSchema(**card) + # metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) + if not card_exists(metric_id=metric_id, project_id=project_id, user_id=user_id): + return None + results = [] + for s in data.series: + results.append({"seriesId": s.series_id, "seriesName": s.name, + **sessions.search_sessions(data=s.filter, project_id=project_id, user_id=user_id)}) + + return results + + +def get_sessions(project_id, user_id, data: schemas.CardSessionsSchema): + results = [] + if len(data.series) == 0: + return results + for s in data.series: + if len(data.filters) > 0: + s.filter.filters += data.filters + s.filter = schemas.SessionsSearchPayloadSchema(**s.filter.model_dump(by_alias=True)) + + results.append({"seriesId": None, "seriesName": s.name, + **sessions.search_sessions(data=s.filter, project_id=project_id, user_id=user_id)}) + + return results + + +def create_card(project: schemas.ProjectContext, user_id, data: schemas.CardSchema, dashboard=False): + with pg_client.PostgresClient() as cur: + session_data = None + if data.metric_type == schemas.MetricType.HEAT_MAP: + if data.session_id is not None: + session_data = {"sessionId": data.session_id} + else: + session_data = __get_heat_map_chart(project=project, user_id=user_id, + data=data, include_mobs=False) + if session_data is not None: + session_data = {"sessionId": session_data["sessionId"]} + + if session_data is not None: + # for EE only + keys = sessions_mobs. \ + __get_mob_keys(project_id=project.project_id, session_id=session_data["sessionId"]) + keys += sessions_mobs. \ + __get_mob_keys_deprecated(session_id=session_data["sessionId"]) # To support old sessions + tag = config('RETENTION_L_VALUE', default='vault') + for k in keys: + try: + extra.tag_session(file_key=k, tag_value=tag) + except Exception as e: + logger.warning(f"!!!Error while tagging: {k} to {tag} for heatMap") + logger.error(str(e)) + + _data = {"session_data": json.dumps(session_data) if session_data is not None else None} + for i, s in enumerate(data.series): + for k in s.model_dump().keys(): + _data[f"{k}_{i}"] = s.__getattribute__(k) + _data[f"index_{i}"] = i + _data[f"filter_{i}"] = s.filter.json() + series_len = len(data.series) + params = {"user_id": user_id, "project_id": project.project_id, **data.model_dump(), **_data, + "default_config": json.dumps(data.default_config.model_dump()), "card_info": None} + if data.metric_type == schemas.MetricType.PATH_ANALYSIS: + params["card_info"] = json.dumps(__get_path_analysis_card_info(data=data)) + + query = """INSERT INTO metrics (project_id, user_id, name, is_public, + view_type, metric_type, metric_of, metric_value, + metric_format, default_config, thumbnail, data, + card_info) + VALUES (%(project_id)s, %(user_id)s, %(name)s, %(is_public)s, + %(view_type)s, %(metric_type)s, %(metric_of)s, %(metric_value)s, + %(metric_format)s, %(default_config)s, %(thumbnail)s, %(session_data)s, + %(card_info)s) + RETURNING metric_id""" + if len(data.series) > 0: + query = f"""WITH m AS ({query}) + INSERT INTO metric_series(metric_id, index, name, filter) + VALUES {",".join([f"((SELECT metric_id FROM m), %(index_{i})s, %(name_{i})s, %(filter_{i})s::jsonb)" + for i in range(series_len)])} + RETURNING metric_id;""" + + query = cur.mogrify(query, params) + cur.execute(query) + r = cur.fetchone() + if dashboard: + return r["metric_id"] + return {"data": get_card(metric_id=r["metric_id"], project_id=project.project_id, user_id=user_id)} + + +def delete_card(project_id, metric_id, user_id): + with pg_client.PostgresClient() as cur: + cur.execute( + cur.mogrify("""\ + UPDATE public.metrics + SET deleted_at = timezone('utc'::text, now()), edited_at = timezone('utc'::text, now()) + WHERE project_id = %(project_id)s + AND metric_id = %(metric_id)s + AND (user_id = %(user_id)s OR is_public) + RETURNING data;""", + {"metric_id": metric_id, "project_id": project_id, "user_id": user_id}) + ) + # for EE only + row = cur.fetchone() + if row: + if row["data"] and not sessions_favorite.favorite_session_exists(session_id=row["data"]["sessionId"]): + keys = sessions_mobs. \ + __get_mob_keys(project_id=project_id, session_id=row["data"]["sessionId"]) + keys += sessions_mobs. \ + __get_mob_keys_deprecated(session_id=row["data"]["sessionId"]) # To support old sessions + tag = config('RETENTION_D_VALUE', default='default') + for k in keys: + try: + extra.tag_session(file_key=k, tag_value=tag) + except Exception as e: + logger.warning(f"!!!Error while tagging: {k} to {tag} for heatMap") + logger.error(str(e)) + return {"state": "success"} + + +def get_funnel_sessions_by_issue(user_id, project_id, metric_id, issue_id, + data: schemas.CardSessionsSchema + # , range_value=None, start_date=None, end_date=None + ): + # No need for this because UI is sending the full payload + # card: dict = get_card(metric_id=metric_id, project_id=project_id, user_id=user_id, flatten=False) + # if card is None: + # return None + # metric: schemas.CardSchema = schemas.CardSchema(**card) + # metric: schemas.CardSchema = __merge_metric_with_data(metric=metric, data=data) + # if metric is None: + # return None + if not card_exists(metric_id=metric_id, project_id=project_id, user_id=user_id): + return None + for s in data.series: + s.filter.startTimestamp = data.startTimestamp + s.filter.endTimestamp = data.endTimestamp + s.filter.limit = data.limit + s.filter.page = data.page + issues_list = funnels.get_issues_on_the_fly_widget(project_id=project_id, data=s.filter).get("issues", {}) + issues_list = issues_list.get("significant", []) + issues_list.get("insignificant", []) + issue = None + for i in issues_list: + if i.get("issueId", "") == issue_id: + issue = i + break + if issue is None: + issue = issues.get(project_id=project_id, issue_id=issue_id) + if issue is not None: + issue = {**issue, + "affectedSessions": 0, + "affectedUsers": 0, + "conversionImpact": 0, + "lostConversions": 0, + "unaffectedSessions": 0} + return {"seriesId": s.series_id, "seriesName": s.name, + "sessions": sessions.search_sessions(user_id=user_id, project_id=project_id, + issue=issue, data=s.filter) + if issue is not None else {"total": 0, "sessions": []}, + "issue": issue} diff --git a/ee/api/chalicelib/core/metrics_ch.py b/ee/api/chalicelib/core/metrics_ch.py new file mode 100644 index 000000000..13fb95fa4 --- /dev/null +++ b/ee/api/chalicelib/core/metrics_ch.py @@ -0,0 +1,614 @@ +import logging +from math import isnan + +import schemas +from chalicelib.utils import ch_client +from chalicelib.utils import exp_ch_helper +from chalicelib.utils import helper +from chalicelib.utils.TimeUTC import TimeUTC +from chalicelib.utils.metrics_helper import __get_step_size + +logger = logging.getLogger(__name__) + + +def __get_basic_constraints(table_name=None, time_constraint=True, round_start=False, data={}, identifier="project_id"): + if table_name: + table_name += "." + else: + table_name = "" + ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"] + if time_constraint: + if round_start: + ch_sub_query.append( + f"toStartOfInterval({table_name}datetime, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)") + else: + ch_sub_query.append(f"{table_name}datetime >= toDateTime(%(startTimestamp)s/1000)") + ch_sub_query.append(f"{table_name}datetime < toDateTime(%(endTimestamp)s/1000)") + return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name) + + +def __frange(start, stop, step): + result = [] + i = start + while i < stop: + result.append(i) + i += step + return result + + +def __add_missing_keys(original, complete): + for missing in [key for key in complete.keys() if key not in original.keys()]: + original[missing] = complete[missing] + return original + + +def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000): + if len(rows) == density: + return rows + step = __get_step_size(start_time, end_time, density, decimal=True) + optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in + __frange(start_time // time_coefficient, end_time // time_coefficient, step)] + result = [] + r = 0 + o = 0 + for i in range(density): + neutral_clone = dict(neutral) + for k in neutral_clone.keys(): + if callable(neutral_clone[k]): + neutral_clone[k] = neutral_clone[k]() + if r < len(rows) and len(result) + len(rows) - r == density: + result += rows[r:] + break + if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]: + # complete missing keys in original object + rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) + result.append(rows[r]) + r += 1 + elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]: + # complete missing keys in original object + rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone) + result.append(rows[r]) + r += 1 + o += 1 + else: + neutral_clone[time_key] = optimal[o][0] + result.append(neutral_clone) + o += 1 + # elif r < len(rows) and rows[r][time_key] >= optimal[o][1]: + # neutral_clone[time_key] = optimal[o][0] + # result.append(neutral_clone) + # o += 1 + # else: + # neutral_clone[time_key] = optimal[o][0] + # result.append(neutral_clone) + # o += 1 + return result + + +def __get_constraint(data, fields, table_name): + constraints = [] + # for k in fields.keys(): + for i, f in enumerate(data.get("filters", [])): + if f["key"] in fields.keys(): + if f["value"] in ["*", ""]: + constraints.append(f"isNotNull({table_name}{fields[f['key']]})") + else: + constraints.append(f"{table_name}{fields[f['key']]} = %({f['key']}_{i})s") + # TODO: remove this in next release + offset = len(data.get("filters", [])) + for i, f in enumerate(data.keys()): + if f in fields.keys(): + if data[f] in ["*", ""]: + constraints.append(f"isNotNull({table_name}{fields[f]})") + else: + constraints.append(f"{table_name}{fields[f]} = %({f}_{i + offset})s") + return constraints + + +def __get_constraint_values(data): + params = {} + for i, f in enumerate(data.get("filters", [])): + params[f"{f['key']}_{i}"] = f["value"] + + # TODO: remove this in next release + offset = len(data.get("filters", [])) + for i, f in enumerate(data.keys()): + params[f"{f}_{i + offset}"] = data[f] + return params + + +METADATA_FIELDS = {"userId": "user_id", + "userAnonymousId": "user_anonymous_id", + "metadata1": "metadata_1", + "metadata2": "metadata_2", + "metadata3": "metadata_3", + "metadata4": "metadata_4", + "metadata5": "metadata_5", + "metadata6": "metadata_6", + "metadata7": "metadata_7", + "metadata8": "metadata_8", + "metadata9": "metadata_9", + "metadata10": "metadata_10"} + + +def __get_meta_constraint(data): + return __get_constraint(data=data, fields=METADATA_FIELDS, table_name="sessions_metadata.") + + +SESSIONS_META_FIELDS = {"revId": "rev_id", + "country": "user_country", + "os": "user_os", + "platform": "user_device_type", + "device": "user_device", + "browser": "user_browser"} + + +def __get_generic_constraint(data, table_name): + return __get_constraint(data=data, fields=SESSIONS_META_FIELDS, table_name=table_name) + + +def get_processed_sessions(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + with ch_client.ClickHouseClient() as ch: + ch_query = f"""\ + SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT sessions.session_id) AS value + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;\ + """ + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + results = { + "value": sum([r["value"] for r in rows]), + "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, + density=density, + neutral={"value": 0}) + } + + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + + ch_query = f""" SELECT COUNT(1) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + count = ch.execute(query=ch_query, params=params) + + count = count[0]["count"] + + results["progress"] = helper.__progress(old_val=count, new_val=results["value"]) + results["unit"] = schemas.TemplatePredefinedUnits.COUNT + return results + + +def __get_domains_errors_neutral(rows): + neutral = {l: 0 for l in [i for k in [list(v.keys()) for v in rows] for i in k]} + if len(neutral.keys()) == 0: + neutral = {"All": 0} + return neutral + + +def __merge_rows_with_neutral(rows, neutral): + for i in range(len(rows)): + rows[i] = {**neutral, **rows[i]} + return rows + + +def __get_domains_errors_4xx_and_5xx(status, project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args) + ch_sub_query.append("requests.event_type='REQUEST'") + ch_sub_query.append("intDiv(requests.status, 100) == %(status_code)s") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT timestamp, + groupArray([domain, toString(count)]) AS keys + FROM (SELECT toUnixTimestamp(toStartOfInterval(requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + requests.url_host AS domain, COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY timestamp,requests.url_host + ORDER BY timestamp, count DESC + LIMIT 5 BY timestamp) AS domain_stats + GROUP BY timestamp;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "step_size": step_size, + "status_code": status, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + rows = __nested_array_to_dict_array(rows) + neutral = __get_domains_errors_neutral(rows) + rows = __merge_rows_with_neutral(rows, neutral) + + return __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral=neutral) + + +def get_domains_errors_4xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + return __get_domains_errors_4xx_and_5xx(status=4, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) + + +def get_domains_errors_5xx(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=6, **args): + return __get_domains_errors_4xx_and_5xx(status=5, project_id=project_id, startTimestamp=startTimestamp, + endTimestamp=endTimestamp, density=density, **args) + + +def __nested_array_to_dict_array(rows): + for r in rows: + for i in range(len(r["keys"])): + r[r["keys"][i][0]] = int(r["keys"][i][1]) + r.pop("keys") + return rows + + +def get_errors_per_domains(project_id, limit, page, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="requests", data=args) + ch_sub_query.append("requests.event_type = 'REQUEST'") + ch_sub_query.append("requests.success = 0") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + **__get_constraint_values(args), + "limit_s": (page - 1) * limit, + "limit": limit} + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT + requests.url_host AS domain, + COUNT(1) AS errors_count, + COUNT(1) OVER () AS total, + SUM(errors_count) OVER () AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + GROUP BY requests.url_host + ORDER BY errors_count DESC + LIMIT %(limit)s OFFSET %(limit_s)s;""" + logger.debug("-----------") + logger.debug(ch.format(query=ch_query, params=params)) + logger.debug("-----------") + rows = ch.execute(query=ch_query, params=params) + response = {"count": 0, "total": 0, "values": []} + if len(rows) > 0: + response["count"] = rows[0]["count"] + response["total"] = rows[0]["total"] + rows = helper.list_to_camel_case(rows) + for r in rows: + r.pop("count") + r.pop("total") + + return response + + +def get_errors_per_type(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=TimeUTC.now(), + platform=None, density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="events", round_start=True, + data=args) + ch_sub_query_chart.append("(events.event_type = 'REQUEST' OR events.event_type = 'ERROR')") + ch_sub_query_chart.append("(events.status>200 OR events.event_type = 'ERROR')") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + SUM(events.event_type = 'REQUEST' AND intDiv(events.status, 100) == 4) AS _4xx, + SUM(events.event_type = 'REQUEST' AND intDiv(events.status, 100) == 5) AS _5xx, + SUM(events.event_type = 'ERROR' AND events.source == 'js_exception') AS js, + SUM(events.event_type = 'ERROR' AND events.source != 'js_exception') AS integrations + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS events + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + rows = helper.list_to_camel_case(rows) + + return __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"4xx": 0, "5xx": 0, "js": 0, "integrations": 0}) + + +def get_impacted_sessions_by_js_errors(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query_chart = __get_basic_constraints(table_name="errors", round_start=True, data=args) + ch_sub_query_chart.append("errors.event_type='ERROR'") + ch_sub_query_chart.append("errors.source == 'js_exception'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(errors.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT errors.session_id) AS sessions_count, + COUNT(DISTINCT errors.error_id) AS errors_count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;;""" + rows = ch.execute(query=ch_query, + params={"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + ch_query = f"""SELECT COUNT(DISTINCT errors.session_id) AS sessions_count, + COUNT(DISTINCT errors.error_id) AS errors_count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS errors + WHERE {" AND ".join(ch_sub_query_chart)};""" + counts = ch.execute(query=ch_query, + params={"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)}) + return {"sessionsCount": counts[0]["sessions_count"], + "errorsCount": counts[0]["errors_count"], + "chart": helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"sessions_count": 0, + "errors_count": 0}))} + + +def get_resources_by_party(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="requests", round_start=True, data=args) + ch_sub_query.append("requests.event_type='REQUEST'") + ch_sub_query.append("requests.success = 0") + sch_sub_query = ["rs.project_id =toUInt16(%(project_id)s)", "rs.event_type='REQUEST'"] + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + # sch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(sub_requests.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + SUM(first.url_host = sub_requests.url_host) AS first_party, + SUM(first.url_host != sub_requests.url_host) AS third_party + FROM + ( + SELECT requests.datetime, requests.url_host + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS requests + WHERE {" AND ".join(ch_sub_query)} + ) AS sub_requests + CROSS JOIN + ( + SELECT + rs.url_host, + COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS rs + WHERE {" AND ".join(sch_sub_query)} + GROUP BY rs.url_host + ORDER BY count DESC + LIMIT 1 + ) AS first + GROUP BY timestamp + ORDER BY timestamp;""" + params = {"step_size": step_size, + "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + # print(ch.format(query=ch_query, params=params)) + rows = ch.execute(query=ch_query, params=params) + return helper.list_to_camel_case(__complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, + neutral={"first_party": 0, + "third_party": 0})) + + +def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + results = {} + + with ch_client.ClickHouseClient() as ch: + rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args) + if len(rows) > 0: + results = helper.dict_to_camel_case(rows[0]) + for key in results: + if isnan(results[key]): + results[key] = 0 + results["chart"] = __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, + endTimestamp, **args) + + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args) + + if len(rows) > 0: + previous = helper.dict_to_camel_case(rows[0]) + results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"]) + results["unit"] = schemas.TemplatePredefinedUnits.COUNT + return results + + +def __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(count)),0) AS value + FROM (SELECT COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)} + GROUP BY session_id) AS groupped_data + WHERE count>0;""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + return rows + + +def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, endTimestamp, density=20, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + ch_query = f"""SELECT timestamp, COALESCE(avgOrNull(count), 0) AS value + FROM (SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + session_id, COUNT(1) AS count + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp,session_id + ORDER BY timestamp) AS groupped_data + WHERE count>0 + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params=params) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + return rows + + +def get_top_metrics_count_requests(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), value=None, density=20, **args): + step_size = __get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density) + ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args) + ch_sub_query_chart.append("pages.event_type='LOCATION'") + meta_condition = __get_meta_constraint(args) + ch_sub_query_chart += meta_condition + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query += meta_condition + + if value is not None: + ch_sub_query.append("pages.url_path = %(value)s") + ch_sub_query_chart.append("pages.url_path = %(value)s") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT COUNT(1) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + params = {"step_size": step_size, "project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, + "value": value, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + result = rows[0] + ch_query = f"""SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp, + COUNT(1) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;""" + rows = ch.execute(query=ch_query, params={**params, **__get_constraint_values(args)}) + rows = __complete_missing_steps(rows=rows, start_time=startTimestamp, + end_time=endTimestamp, + density=density, neutral={"value": 0}) + result["chart"] = rows + result["unit"] = schemas.TemplatePredefinedUnits.COUNT + return helper.dict_to_camel_case(result) + + +def get_unique_users(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), + density=7, **args): + step_size = __get_step_size(startTimestamp, endTimestamp, density) + ch_sub_query = __get_basic_constraints(table_name="sessions", data=args) + ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args) + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + ch_sub_query_chart += meta_condition + ch_sub_query_chart.append("isNotNull(sessions.user_id)") + ch_sub_query_chart.append("sessions.user_id!=''") + with ch_client.ClickHouseClient() as ch: + ch_query = f"""\ + SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp, + COUNT(DISTINCT sessions.user_id) AS value + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query_chart)} + GROUP BY timestamp + ORDER BY timestamp;\ + """ + params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + + rows = ch.execute(query=ch_query, params=params) + + results = { + "value": sum([r["value"] for r in rows]), + "chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp, + density=density, + neutral={"value": 0}) + } + + diff = endTimestamp - startTimestamp + endTimestamp = startTimestamp + startTimestamp = endTimestamp - diff + + ch_query = f""" SELECT COUNT(DISTINCT user_id) AS count + FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions + WHERE {" AND ".join(ch_sub_query)};""" + params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp, + **__get_constraint_values(args)} + + count = ch.execute(query=ch_query, params=params) + + count = count[0]["count"] + + results["progress"] = helper.__progress(old_val=count, new_val=results["value"]) + results["unit"] = schemas.TemplatePredefinedUnits.COUNT + return results + + +def get_speed_index_location(project_id, startTimestamp=TimeUTC.now(delta_days=-1), + endTimestamp=TimeUTC.now(), **args): + ch_sub_query = __get_basic_constraints(table_name="pages", data=args) + ch_sub_query.append("pages.event_type='LOCATION'") + ch_sub_query.append("isNotNull(pages.speed_index)") + ch_sub_query.append("pages.speed_index>0") + meta_condition = __get_meta_constraint(args) + ch_sub_query += meta_condition + + with ch_client.ClickHouseClient() as ch: + ch_query = f"""SELECT sessions.user_country, COALESCE(avgOrNull(pages.speed_index),0) AS value + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + INNER JOIN {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions USING (session_id) + WHERE {" AND ".join(ch_sub_query)} + GROUP BY sessions.user_country + ORDER BY value ,sessions.user_country;""" + params = {"project_id": project_id, + "startTimestamp": startTimestamp, + "endTimestamp": endTimestamp, **__get_constraint_values(args)} + rows = ch.execute(query=ch_query, params=params) + ch_query = f"""SELECT COALESCE(avgOrNull(pages.speed_index),0) AS avg + FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages + WHERE {" AND ".join(ch_sub_query)};""" + avg = ch.execute(query=ch_query, params=params)[0]["avg"] if len(rows) > 0 else 0 + return {"value": avg, "chart": helper.list_to_camel_case(rows), "unit": schemas.TemplatePredefinedUnits.MILLISECOND} diff --git a/ee/api/clean-dev.sh b/ee/api/clean-dev.sh index b952bc94b..a56afff7b 100755 --- a/ee/api/clean-dev.sh +++ b/ee/api/clean-dev.sh @@ -15,6 +15,9 @@ rm -rf ./chalicelib/core/collaboration_base.py rm -rf ./chalicelib/core/collaboration_msteams.py rm -rf ./chalicelib/core/collaboration_slack.py rm -rf ./chalicelib/core/countries.py +rm -rf ./chalicelib/core/metrics.py +rm -rf ./chalicelib/core/metrics_ch.py +rm -rf ./chalicelib/core/custom_metrics.py rm -rf ./chalicelib/core/custom_metrics_predefined.py rm -rf ./chalicelib/core/dashboards.py rm -rf ./chalicelib/core/errors_favorite.py @@ -97,4 +100,8 @@ rm -rf ./chalicelib/core/db_request_handler.py rm -rf ./chalicelib/utils/or_cache rm -rf ./routers/subs/health.py rm -rf ./chalicelib/core/spot.py -rm -rf ./chalicelib/core/unprocessed_sessions.py \ No newline at end of file +rm -rf ./chalicelib/core/unprocessed_sessions.py +rm -rf ./chalicelib/core/product_anaytics2.py +rm -rf ./chalicelib/utils/ch_client.py +rm -rf ./chalicelib/utils/ch_client_exp.py +rm -rf ./routers/subs/product_anaytics.py diff --git a/ee/api/env.default b/ee/api/env.default index 460062047..9d30b0e0b 100644 --- a/ee/api/env.default +++ b/ee/api/env.default @@ -12,6 +12,7 @@ captcha_server= CH_COMPRESSION=true ch_host= ch_port= +ch_port_http= ch_receive_timeout=10 ch_timeout=30 change_password_link=/reset-password?invitation=%s&&pass=%s @@ -83,4 +84,5 @@ SITE_URL= sourcemaps_bucket=sourcemaps sourcemaps_reader=http://sourcemapreader-openreplay.app.svc.cluster.local:9000/sourcemaps/{}/sourcemaps TRACE_PERIOD=300 -TZ=UTC \ No newline at end of file +TZ=UTC +EXP_CH_DRIVER=true \ No newline at end of file diff --git a/ee/api/requirements-alerts.txt b/ee/api/requirements-alerts.txt index 0b9da98ad..ebf1aea0d 100644 --- a/ee/api/requirements-alerts.txt +++ b/ee/api/requirements-alerts.txt @@ -1,22 +1,22 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.32.3 -boto3==1.35.72 +boto3==1.35.76 pyjwt==2.10.1 psycopg2-binary==2.9.10 psycopg[pool,binary]==3.2.3 clickhouse-driver[lz4]==0.2.9 -clickhouse-connect==0.8.8 +clickhouse-connect==0.8.9 elasticsearch==8.16.0 jira==3.8.0 cachetools==5.5.0 -fastapi==0.115.5 +fastapi==0.115.6 uvicorn[standard]==0.32.1 python-decouple==3.8 -pydantic[email]==2.10.2 +pydantic[email]==2.10.3 apscheduler==3.11.0 azure-storage-blob==12.23.1 \ No newline at end of file diff --git a/ee/api/requirements-crons.txt b/ee/api/requirements-crons.txt index d283c166a..6aa2370d8 100644 --- a/ee/api/requirements-crons.txt +++ b/ee/api/requirements-crons.txt @@ -1,21 +1,21 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.32.3 -boto3==1.35.72 +boto3==1.35.76 pyjwt==2.10.1 psycopg2-binary==2.9.10 psycopg[pool,binary]==3.2.3 clickhouse-driver[lz4]==0.2.9 -clickhouse-connect==0.8.8 +clickhouse-connect==0.8.9 elasticsearch==8.16.0 jira==3.8.0 cachetools==5.5.0 -fastapi==0.115.5 +fastapi==0.115.6 python-decouple==3.8 -pydantic[email]==2.10.2 +pydantic[email]==2.10.3 apscheduler==3.11.0 redis==5.2.0 diff --git a/ee/api/requirements.txt b/ee/api/requirements.txt index c8b02843f..0c0b2902a 100644 --- a/ee/api/requirements.txt +++ b/ee/api/requirements.txt @@ -1,23 +1,23 @@ # Keep this version to not have conflicts between requests and boto3 urllib3==1.26.16 requests==2.32.3 -boto3==1.35.72 +boto3==1.35.76 pyjwt==2.10.1 psycopg2-binary==2.9.10 psycopg[pool,binary]==3.2.3 clickhouse-driver[lz4]==0.2.9 -clickhouse-connect==0.8.8 +clickhouse-connect==0.8.9 elasticsearch==8.16.0 jira==3.8.0 cachetools==5.5.0 -fastapi==0.115.5 +fastapi==0.115.6 uvicorn[standard]==0.32.1 gunicorn==23.0.0 python-decouple==3.8 -pydantic[email]==2.10.2 +pydantic[email]==2.10.3 apscheduler==3.11.0 # TODO: enable after xmlsec fix https://github.com/xmlsec/python-xmlsec/issues/252 diff --git a/ee/api/schemas/schemas_ee.py b/ee/api/schemas/schemas_ee.py index 1d2197895..a5cc5c78f 100644 --- a/ee/api/schemas/schemas_ee.py +++ b/ee/api/schemas/schemas_ee.py @@ -61,20 +61,6 @@ class SignalsSchema(BaseModel): data: dict = Field(default={}) -class InsightCategories(str, Enum): - ERRORS = "errors" - NETWORK = "network" - RAGE = "rage" - RESOURCES = "resources" - - -class GetInsightsSchema(schemas._TimedSchema): - startTimestamp: int = Field(default=TimeUTC.now(-7)) - endTimestamp: int = Field(default=TimeUTC.now()) - metricValue: List[InsightCategories] = Field(default=[]) - series: List[schemas.CardSeriesSchema] = Field(default=[]) - - class CreateMemberSchema(schemas.CreateMemberSchema): roleId: Optional[int] = Field(default=None) @@ -150,15 +136,3 @@ class AssistRecordSearchPayloadSchema(schemas._PaginatedSchema, schemas._TimedSc user_id: Optional[int] = Field(default=None) query: Optional[str] = Field(default=None) order: Literal["asc", "desc"] = Field(default="desc") - - -# TODO: move these to schema when Insights is supported on PG -class CardInsights(schemas.CardInsights): - metric_value: List[InsightCategories] = Field(default=[]) - - @model_validator(mode="after") - def restrictions(self): - return self - - -CardSchema = ORUnion(Union[schemas.__cards_union_base, CardInsights], discriminator='metric_type') diff --git a/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql b/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql index eee54de4e..28dd9ed7f 100644 --- a/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql +++ b/ee/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql @@ -19,6 +19,10 @@ $fn_def$, :'next_version') -- +DELETE +FROM public.metrics +WHERE metrics.metric_type = 'insights'; + COMMIT; \elif :is_next diff --git a/scripts/docker-compose/alerts.env b/scripts/docker-compose/alerts.env index 7eec32512..040b461fe 100644 --- a/scripts/docker-compose/alerts.env +++ b/scripts/docker-compose/alerts.env @@ -12,6 +12,7 @@ S3_SECRET="${COMMON_S3_SECRET}" SITE_URL="${COMMON_PROTOCOL}://${COMMON_DOMAIN_NAME}" ch_host="clickhouse" ch_port="9000" +ch_port_http="8123" ch_username="default" js_cache_bucket=sessions-assets jwt_secret="${COMMON_JWT_SECRET}" diff --git a/scripts/docker-compose/chalice.env b/scripts/docker-compose/chalice.env index 5206e91f2..b6607734e 100644 --- a/scripts/docker-compose/chalice.env +++ b/scripts/docker-compose/chalice.env @@ -12,6 +12,7 @@ S3_SECRET="${COMMON_S3_SECRET}" SITE_URL="${COMMON_PROTOCOL}://${COMMON_DOMAIN_NAME}" ch_host="clickhouse" ch_port="9000" +ch_port_http="8123" ch_username="default" js_cache_bucket=sessions-assets jwt_secret="${COMMON_JWT_SECRET}" diff --git a/scripts/helmcharts/openreplay/charts/alerts/templates/deployment.yaml b/scripts/helmcharts/openreplay/charts/alerts/templates/deployment.yaml index bc09fe218..2fd0f57ff 100644 --- a/scripts/helmcharts/openreplay/charts/alerts/templates/deployment.yaml +++ b/scripts/helmcharts/openreplay/charts/alerts/templates/deployment.yaml @@ -55,6 +55,8 @@ spec: value: "{{ .Values.global.clickhouse.chHost }}" - name: ch_port value: "{{ .Values.global.clickhouse.service.webPort }}" + - name: ch_port_http + value: "{{ .Values.global.clickhouse.service.dataPort }}" - name: ch_username value: '{{ .Values.global.clickhouse.username }}' - name: ch_password diff --git a/scripts/helmcharts/openreplay/charts/chalice/templates/deployment.yaml b/scripts/helmcharts/openreplay/charts/chalice/templates/deployment.yaml index f5f1183e1..b70cfe1c9 100644 --- a/scripts/helmcharts/openreplay/charts/chalice/templates/deployment.yaml +++ b/scripts/helmcharts/openreplay/charts/chalice/templates/deployment.yaml @@ -54,6 +54,8 @@ spec: value: "{{ .Values.global.clickhouse.chHost }}" - name: ch_port value: "{{ .Values.global.clickhouse.service.webPort }}" + - name: ch_port_http + value: "{{ .Values.global.clickhouse.service.dataPort }}" - name: sourcemaps_reader value: "http://sourcemapreader-openreplay.{{.Release.Namespace}}.{{.Values.global.clusterDomain}}:9000/%s/sourcemaps" - name: ASSIST_URL diff --git a/scripts/helmcharts/openreplay/charts/utilities/values.yaml b/scripts/helmcharts/openreplay/charts/utilities/values.yaml index 25da66396..d0c1ecd6d 100644 --- a/scripts/helmcharts/openreplay/charts/utilities/values.yaml +++ b/scripts/helmcharts/openreplay/charts/utilities/values.yaml @@ -89,6 +89,7 @@ apiCrons: chalice: env: ch_port: 9000 + ch_port_http: 8123 captcha_server: '' captcha_key: '' async_Token: '' diff --git a/scripts/helmcharts/openreplay/templates/job.yaml b/scripts/helmcharts/openreplay/templates/job.yaml index 46a25f891..616b293a6 100644 --- a/scripts/helmcharts/openreplay/templates/job.yaml +++ b/scripts/helmcharts/openreplay/templates/job.yaml @@ -213,6 +213,8 @@ spec: value: "{{.Values.global.clickhouse.chHost}}" - name: CH_PORT value: "{{.Values.global.clickhouse.service.webPort}}" + - name: CH_PORT_HTTP + value: "{{.Values.global.clickhouse.service.dataPort}}" - name: CH_USERNAME value: "{{.Values.global.clickhouse.username}}" - name: CH_PASSWORD @@ -458,6 +460,8 @@ spec: value: "{{.Values.global.clickhouse.chHost}}" - name: CH_PORT value: "{{.Values.global.clickhouse.service.webPort}}" + - name: CH_PORT_HTTP + value: "{{.Values.global.clickhouse.service.dataPort}}" - name: CH_USERNAME value: "{{.Values.global.clickhouse.username}}" - name: CH_PASSWORD diff --git a/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql b/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql index b7fc24635..d3b872a43 100644 --- a/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql +++ b/scripts/schema/db/init_dbs/postgresql/1.22.0/1.22.0.sql @@ -19,6 +19,10 @@ $fn_def$, :'next_version') -- +DELETE +FROM public.metrics +WHERE metrics.metric_type = 'insights'; + COMMIT; \elif :is_next