From c0bb05bc0f4ab5205482651a540cd67012cda776 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 28 Mar 2025 16:50:09 +0100 Subject: [PATCH] feat(chalice): support regex operator for sessions search --- .../core/product_analytics/events.py | 20 +++------- api/chalicelib/core/sessions/sessions_ch.py | 40 +++++++++++++------ api/chalicelib/utils/exp_ch_helper.py | 10 +++++ api/chalicelib/utils/sql_helper.py | 4 +- 4 files changed, 47 insertions(+), 27 deletions(-) diff --git a/api/chalicelib/core/product_analytics/events.py b/api/chalicelib/core/product_analytics/events.py index e645618bd..895f2e407 100644 --- a/api/chalicelib/core/product_analytics/events.py +++ b/api/chalicelib/core/product_analytics/events.py @@ -1,10 +1,10 @@ import logging -from typing import Union + import schemas from chalicelib.utils import helper from chalicelib.utils import sql_helper as sh from chalicelib.utils.ch_client import ClickHouseClient -from schemas import SearchEventOperator +from chalicelib.utils.exp_ch_helper import get_sub_condition logger = logging.getLogger(__name__) @@ -32,14 +32,6 @@ def get_events(project_id: int, page: schemas.PaginatedSchema): return {"total": total, "list": helper.list_to_camel_case(rows)} -def __get_sub_condition(col_name: str, val_name: str, - operator: Union[schemas.SearchEventOperator, schemas.MathOperator]): - if operator == SearchEventOperator.PATTERN: - return f"match({col_name}, %({val_name})s)" - op = sh.get_sql_operator(operator) - return f"{col_name} {op} %({val_name})s" - - def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): with ClickHouseClient() as ch_client: full_args = {"project_id": project_id, "startDate": data.startTimestamp, "endDate": data.endTimestamp, @@ -68,7 +60,7 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): condition = f"empty({column})" else: condition = sh.multi_conditions( - __get_sub_condition(col_name=column, val_name=f_k, operator=f.operator), + get_sub_condition(col_name=column, val_name=f_k, operator=f.operator), values=f.value, value_key=f_k) constraints.append(condition) @@ -81,10 +73,10 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): p_k = f"e_{i}_p_{j}" full_args = {**full_args, **sh.multi_values(ef.value, value_key=p_k)} if ef.is_predefined: - sub_condition = __get_sub_condition(col_name=ef.name, val_name=p_k, operator=ef.operator) + sub_condition = get_sub_condition(col_name=ef.name, val_name=p_k, operator=ef.operator) else: - sub_condition = __get_sub_condition(col_name=f"properties.{ef.name}", - val_name=p_k, operator=ef.operator) + sub_condition = get_sub_condition(col_name=f"properties.{ef.name}", + val_name=p_k, operator=ef.operator) sub_conditions.append(sh.multi_conditions(sub_condition, ef.value, value_key=p_k)) if len(sub_conditions) > 0: condition += " AND (" + (" " + f.properties.operator + " ").join(sub_conditions) + ")" diff --git a/api/chalicelib/core/sessions/sessions_ch.py b/api/chalicelib/core/sessions/sessions_ch.py index 32849d2ce..7ba259b80 100644 --- a/api/chalicelib/core/sessions/sessions_ch.py +++ b/api/chalicelib/core/sessions/sessions_ch.py @@ -6,6 +6,7 @@ from chalicelib.core import events, metadata from . import performance_event, sessions_legacy from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper from chalicelib.utils import sql_helper as sh +from chalicelib.utils.exp_ch_helper import get_sub_condition logger = logging.getLogger(__name__) @@ -330,7 +331,11 @@ def json_condition(table_alias, json_column, json_key, op, values, value_key, ch extract_func = "JSONExtractFloat" if numeric_type == "float" else "JSONExtractInt" condition = f"{extract_func}(toString({table_alias}.`{json_column}`), '{json_key}') {op} %({value_key})s" else: - condition = f"JSONExtractString(toString({table_alias}.`{json_column}`), '{json_key}') {op} %({value_key})s" + # condition = f"JSONExtractString(toString({table_alias}.`{json_column}`), '{json_key}') {op} %({value_key})s" + condition = get_sub_condition( + col_name=f"JSONExtractString(toString({table_alias}.`{json_column}`), '{json_key}')", + val_name=value_key, operator=op + ) conditions.append(sh.multi_conditions(condition, values, value_key=value_key)) @@ -678,8 +683,10 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu # "selector", op, event.value, e_k) # ) event_where.append( - sh.multi_conditions(f"main.`$properties`.selector {op} %({e_k})s", - event.value, value_key=e_k) + sh.multi_conditions( + get_sub_condition(col_name=f"main.`$properties`.selector", + val_name=e_k, operator=event.operator), + event.value, value_key=e_k) ) events_conditions[-1]["condition"] = event_where[-1] else: @@ -688,20 +695,26 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu # "sub", "$properties", _column, op, event.value, e_k # )) event_where.append( - sh.multi_conditions(f"sub.`$properties`.{_column} {op} %({e_k})s", - event.value, value_key=e_k) + sh.multi_conditions( + get_sub_condition(col_name=f"sub.`$properties`.{_column}", + val_name=e_k, operator=event.operator), + event.value, value_key=e_k) ) events_conditions_not.append( { - "type": f"sub.`$event_name`='{exp_ch_helper.get_event_type(event_type, platform=platform)}'"}) + "type": f"sub.`$event_name`='{exp_ch_helper.get_event_type(event_type, platform=platform)}'" + } + ) events_conditions_not[-1]["condition"] = event_where[-1] else: # event_where.append( # json_condition("main", "$properties", _column, op, event.value, e_k) # ) event_where.append( - sh.multi_conditions(f"main.`$properties`.{_column} {op} %({e_k})s", - event.value, value_key=e_k) + sh.multi_conditions( + get_sub_condition(col_name=f"main.`$properties`.{_column}", + val_name=e_k, operator=event.operator), + event.value, value_key=e_k) ) events_conditions[-1]["condition"] = event_where[-1] else: @@ -1210,7 +1223,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu elif event_type == schemas.EventType.EVENT: event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main " _column = events.EventType.CLICK.column - event_where.append(f"main.`$event_name`=%({e_k})s AND notEmpty(main.session_id)") + event_where.append(f"main.`$event_name`=%({e_k})s AND main.session_id>0") events_conditions.append({"type": event_where[-1], "condition": ""}) else: @@ -1221,10 +1234,13 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu a_k = f"{e_k}_att_{l}" full_args = {**full_args, **sh.multi_values(property.value, value_key=a_k)} - op = sh.get_sql_operator(property.operator) - condition = f"main.properties.{property.name} {op} %({a_k})s" + if property.is_predefined: - condition = f"main.{property.name} {op} %({a_k})s" + condition = get_sub_condition(col_name=f"main.{property.name}", + val_name=a_k, operator=property.operator) + else: + condition = get_sub_condition(col_name=f"main.properties.{property.name}", + val_name=a_k, operator=property.operator) event_where.append( sh.multi_conditions(condition, property.value, value_key=a_k) ) diff --git a/api/chalicelib/utils/exp_ch_helper.py b/api/chalicelib/utils/exp_ch_helper.py index 3b2ff7f2d..b2c061533 100644 --- a/api/chalicelib/utils/exp_ch_helper.py +++ b/api/chalicelib/utils/exp_ch_helper.py @@ -3,6 +3,8 @@ import re from typing import Union import schemas +from chalicelib.utils import sql_helper as sh +from schemas import SearchEventOperator logger = logging.getLogger(__name__) @@ -150,3 +152,11 @@ def simplify_clickhouse_types(ch_types: list[str]) -> list[str]: by calling `simplify_clickhouse_type` on each. """ return list(set([simplify_clickhouse_type(t) for t in ch_types])) + + +def get_sub_condition(col_name: str, val_name: str, + operator: Union[schemas.SearchEventOperator, schemas.MathOperator]): + if operator == SearchEventOperator.PATTERN: + return f"match({col_name}, %({val_name})s)" + op = sh.get_sql_operator(operator) + return f"{col_name} {op} %({val_name})s" diff --git a/api/chalicelib/utils/sql_helper.py b/api/chalicelib/utils/sql_helper.py index 1de16c70f..521050634 100644 --- a/api/chalicelib/utils/sql_helper.py +++ b/api/chalicelib/utils/sql_helper.py @@ -14,6 +14,9 @@ def get_sql_operator(op: Union[schemas.SearchEventOperator, schemas.ClickEventEx schemas.SearchEventOperator.NOT_CONTAINS: "NOT ILIKE", schemas.SearchEventOperator.STARTS_WITH: "ILIKE", schemas.SearchEventOperator.ENDS_WITH: "ILIKE", + # this is not used as an operator, it is used in order to maintain a valid value for conditions + schemas.SearchEventOperator.PATTERN: "regex", + # Selector operators: schemas.ClickEventExtraOperator.IS: "=", schemas.ClickEventExtraOperator.IS_NOT: "!=", @@ -72,4 +75,3 @@ def single_value(values): if isinstance(v, Enum): values[i] = v.value return values -