feat(chalice): support regex operator for sessions search

This commit is contained in:
Taha Yassine Kraiem 2025-03-28 16:50:09 +01:00 committed by Kraiem Taha Yassine
parent 70258e5c1d
commit c0bb05bc0f
4 changed files with 47 additions and 27 deletions

View file

@ -1,10 +1,10 @@
import logging
from typing import Union
import schemas
from chalicelib.utils import helper
from chalicelib.utils import sql_helper as sh
from chalicelib.utils.ch_client import ClickHouseClient
from schemas import SearchEventOperator
from chalicelib.utils.exp_ch_helper import get_sub_condition
logger = logging.getLogger(__name__)
@ -32,14 +32,6 @@ def get_events(project_id: int, page: schemas.PaginatedSchema):
return {"total": total, "list": helper.list_to_camel_case(rows)}
def __get_sub_condition(col_name: str, val_name: str,
operator: Union[schemas.SearchEventOperator, schemas.MathOperator]):
if operator == SearchEventOperator.PATTERN:
return f"match({col_name}, %({val_name})s)"
op = sh.get_sql_operator(operator)
return f"{col_name} {op} %({val_name})s"
def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema):
with ClickHouseClient() as ch_client:
full_args = {"project_id": project_id, "startDate": data.startTimestamp, "endDate": data.endTimestamp,
@ -68,7 +60,7 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema):
condition = f"empty({column})"
else:
condition = sh.multi_conditions(
__get_sub_condition(col_name=column, val_name=f_k, operator=f.operator),
get_sub_condition(col_name=column, val_name=f_k, operator=f.operator),
values=f.value, value_key=f_k)
constraints.append(condition)
@ -81,10 +73,10 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema):
p_k = f"e_{i}_p_{j}"
full_args = {**full_args, **sh.multi_values(ef.value, value_key=p_k)}
if ef.is_predefined:
sub_condition = __get_sub_condition(col_name=ef.name, val_name=p_k, operator=ef.operator)
sub_condition = get_sub_condition(col_name=ef.name, val_name=p_k, operator=ef.operator)
else:
sub_condition = __get_sub_condition(col_name=f"properties.{ef.name}",
val_name=p_k, operator=ef.operator)
sub_condition = get_sub_condition(col_name=f"properties.{ef.name}",
val_name=p_k, operator=ef.operator)
sub_conditions.append(sh.multi_conditions(sub_condition, ef.value, value_key=p_k))
if len(sub_conditions) > 0:
condition += " AND (" + (" " + f.properties.operator + " ").join(sub_conditions) + ")"

View file

@ -6,6 +6,7 @@ from chalicelib.core import events, metadata
from . import performance_event, sessions_legacy
from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper
from chalicelib.utils import sql_helper as sh
from chalicelib.utils.exp_ch_helper import get_sub_condition
logger = logging.getLogger(__name__)
@ -330,7 +331,11 @@ def json_condition(table_alias, json_column, json_key, op, values, value_key, ch
extract_func = "JSONExtractFloat" if numeric_type == "float" else "JSONExtractInt"
condition = f"{extract_func}(toString({table_alias}.`{json_column}`), '{json_key}') {op} %({value_key})s"
else:
condition = f"JSONExtractString(toString({table_alias}.`{json_column}`), '{json_key}') {op} %({value_key})s"
# condition = f"JSONExtractString(toString({table_alias}.`{json_column}`), '{json_key}') {op} %({value_key})s"
condition = get_sub_condition(
col_name=f"JSONExtractString(toString({table_alias}.`{json_column}`), '{json_key}')",
val_name=value_key, operator=op
)
conditions.append(sh.multi_conditions(condition, values, value_key=value_key))
@ -678,8 +683,10 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
# "selector", op, event.value, e_k)
# )
event_where.append(
sh.multi_conditions(f"main.`$properties`.selector {op} %({e_k})s",
event.value, value_key=e_k)
sh.multi_conditions(
get_sub_condition(col_name=f"main.`$properties`.selector",
val_name=e_k, operator=event.operator),
event.value, value_key=e_k)
)
events_conditions[-1]["condition"] = event_where[-1]
else:
@ -688,20 +695,26 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
# "sub", "$properties", _column, op, event.value, e_k
# ))
event_where.append(
sh.multi_conditions(f"sub.`$properties`.{_column} {op} %({e_k})s",
event.value, value_key=e_k)
sh.multi_conditions(
get_sub_condition(col_name=f"sub.`$properties`.{_column}",
val_name=e_k, operator=event.operator),
event.value, value_key=e_k)
)
events_conditions_not.append(
{
"type": f"sub.`$event_name`='{exp_ch_helper.get_event_type(event_type, platform=platform)}'"})
"type": f"sub.`$event_name`='{exp_ch_helper.get_event_type(event_type, platform=platform)}'"
}
)
events_conditions_not[-1]["condition"] = event_where[-1]
else:
# event_where.append(
# json_condition("main", "$properties", _column, op, event.value, e_k)
# )
event_where.append(
sh.multi_conditions(f"main.`$properties`.{_column} {op} %({e_k})s",
event.value, value_key=e_k)
sh.multi_conditions(
get_sub_condition(col_name=f"main.`$properties`.{_column}",
val_name=e_k, operator=event.operator),
event.value, value_key=e_k)
)
events_conditions[-1]["condition"] = event_where[-1]
else:
@ -1210,7 +1223,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
elif event_type == schemas.EventType.EVENT:
event_from = event_from % f"{MAIN_EVENTS_TABLE} AS main "
_column = events.EventType.CLICK.column
event_where.append(f"main.`$event_name`=%({e_k})s AND notEmpty(main.session_id)")
event_where.append(f"main.`$event_name`=%({e_k})s AND main.session_id>0")
events_conditions.append({"type": event_where[-1], "condition": ""})
else:
@ -1221,10 +1234,13 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
a_k = f"{e_k}_att_{l}"
full_args = {**full_args,
**sh.multi_values(property.value, value_key=a_k)}
op = sh.get_sql_operator(property.operator)
condition = f"main.properties.{property.name} {op} %({a_k})s"
if property.is_predefined:
condition = f"main.{property.name} {op} %({a_k})s"
condition = get_sub_condition(col_name=f"main.{property.name}",
val_name=a_k, operator=property.operator)
else:
condition = get_sub_condition(col_name=f"main.properties.{property.name}",
val_name=a_k, operator=property.operator)
event_where.append(
sh.multi_conditions(condition, property.value, value_key=a_k)
)

View file

@ -3,6 +3,8 @@ import re
from typing import Union
import schemas
from chalicelib.utils import sql_helper as sh
from schemas import SearchEventOperator
logger = logging.getLogger(__name__)
@ -150,3 +152,11 @@ def simplify_clickhouse_types(ch_types: list[str]) -> list[str]:
by calling `simplify_clickhouse_type` on each.
"""
return list(set([simplify_clickhouse_type(t) for t in ch_types]))
def get_sub_condition(col_name: str, val_name: str,
operator: Union[schemas.SearchEventOperator, schemas.MathOperator]):
if operator == SearchEventOperator.PATTERN:
return f"match({col_name}, %({val_name})s)"
op = sh.get_sql_operator(operator)
return f"{col_name} {op} %({val_name})s"

View file

@ -14,6 +14,9 @@ def get_sql_operator(op: Union[schemas.SearchEventOperator, schemas.ClickEventEx
schemas.SearchEventOperator.NOT_CONTAINS: "NOT ILIKE",
schemas.SearchEventOperator.STARTS_WITH: "ILIKE",
schemas.SearchEventOperator.ENDS_WITH: "ILIKE",
# this is not used as an operator, it is used in order to maintain a valid value for conditions
schemas.SearchEventOperator.PATTERN: "regex",
# Selector operators:
schemas.ClickEventExtraOperator.IS: "=",
schemas.ClickEventExtraOperator.IS_NOT: "!=",
@ -72,4 +75,3 @@ def single_value(values):
if isinstance(v, Enum):
values[i] = v.value
return values