feat(chalice): support data-type for sessions search

This commit is contained in:
Taha Yassine Kraiem 2025-05-13 17:36:16 +02:00 committed by Kraiem Taha Yassine
parent d378b00bf7
commit c4ad390b3f
2 changed files with 20 additions and 28 deletions

View file

@ -6,7 +6,7 @@ from chalicelib.core import events, metadata
from . import performance_event, sessions_legacy from . import performance_event, sessions_legacy
from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper
from chalicelib.utils import sql_helper as sh from chalicelib.utils import sql_helper as sh
from chalicelib.utils.exp_ch_helper import get_sub_condition from chalicelib.utils.exp_ch_helper import get_sub_condition, get_col_cast
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -1264,14 +1264,15 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
for l, property in enumerate(event.properties.filters): for l, property in enumerate(event.properties.filters):
a_k = f"{e_k}_att_{l}" a_k = f"{e_k}_att_{l}"
full_args = {**full_args, full_args = {**full_args,
**sh.multi_values(property.value, value_key=a_k)} **sh.multi_values(property.value, value_key=a_k, data_type=property.data_type)}
cast = get_col_cast(data_type=property.data_type, value=property.value)
if property.is_predefined: if property.is_predefined:
condition = get_sub_condition(col_name=f"main.{property.name}", condition = get_sub_condition(col_name=f"accurateCastOrNull(main.`{property.name}`,'{cast}')",
val_name=a_k, operator=property.operator) val_name=a_k, operator=property.operator)
else: else:
condition = get_sub_condition(col_name=f"main.properties.{property.name}", condition = get_sub_condition(
val_name=a_k, operator=property.operator) col_name=f"accurateCastOrNull(main.properties.`{property.name}`,'{cast}')",
val_name=a_k, operator=property.operator)
event_where.append( event_where.append(
sh.multi_conditions(condition, property.value, value_key=a_k) sh.multi_conditions(condition, property.value, value_key=a_k)
) )
@ -1505,7 +1506,7 @@ def search_query_parts_ch(data: schemas.SessionsSearchPayloadSchema, error_statu
if c_f.type == schemas.FetchFilterType.FETCH_URL.value: if c_f.type == schemas.FetchFilterType.FETCH_URL.value:
_extra_or_condition.append( _extra_or_condition.append(
sh.multi_conditions(f"extra_event.url_path {op} %({e_k})s", sh.multi_conditions(f"extra_event.url_path {op} %({e_k})s",
c_f.value, value_key=e_k)) c_f.value, value_key=e_k))
else: else:
logging.warning(f"unsupported extra_event type:${c.type}") logging.warning(f"unsupported extra_event type:${c.type}")
if len(_extra_or_condition) > 0: if len(_extra_or_condition) > 0:
@ -1577,18 +1578,15 @@ def get_user_sessions(project_id, user_id, start_date, end_date):
def get_session_user(project_id, user_id): def get_session_user(project_id, user_id):
with pg_client.PostgresClient() as cur: with pg_client.PostgresClient() as cur:
query = cur.mogrify( query = cur.mogrify(
"""\ """ \
SELECT SELECT user_id,
user_id, count(*) as session_count,
count(*) as session_count, max(start_ts) as last_seen,
max(start_ts) as last_seen, min(start_ts) as first_seen
min(start_ts) as first_seen FROM "public".sessions
FROM WHERE project_id = %(project_id)s
"public".sessions AND user_id = %(userId)s
WHERE AND duration is not null
project_id = %(project_id)s
AND user_id = %(userId)s
AND duration is not null
GROUP BY user_id; GROUP BY user_id;
""", """,
{"project_id": project_id, "userId": user_id} {"project_id": project_id, "userId": user_id}

View file

@ -171,6 +171,8 @@ def get_sub_condition(col_name: str, val_name: str,
def get_col_cast(data_type: schemas.PropertyType, value: Any) -> str: def get_col_cast(data_type: schemas.PropertyType, value: Any) -> str:
if value is None or len(value) == 0: if value is None or len(value) == 0:
return "" return ""
if isinstance(value, list):
value = value[0]
if data_type in (schemas.PropertyType.INT, schemas.PropertyType.FLOAT): if data_type in (schemas.PropertyType.INT, schemas.PropertyType.FLOAT):
return best_clickhouse_type(value) return best_clickhouse_type(value)
return data_type.capitalize() return data_type.capitalize()
@ -193,14 +195,6 @@ def best_clickhouse_type(value):
""" """
Return the most compact ClickHouse numeric type that can store *value* loss-lessly. Return the most compact ClickHouse numeric type that can store *value* loss-lessly.
>>> best_clickhouse_type(42)
'UInt8'
>>> best_clickhouse_type(-42)
'Int8'
>>> best_clickhouse_type(1.5)
'Float32'
>>> best_clickhouse_type(1e308)
'Float64'
""" """
# Treat bool like tiny int # Treat bool like tiny int
if isinstance(value, bool): if isinstance(value, bool):
@ -212,7 +206,7 @@ def best_clickhouse_type(value):
if lo <= value <= hi: if lo <= value <= hi:
return name return name
# Beyond UInt64: ClickHouse offers Int128 / Int256 or Decimal # Beyond UInt64: ClickHouse offers Int128 / Int256 or Decimal
return "Int128 (or Decimal)" return "Int128"
# --- Decimal.Decimal (exact) --- # --- Decimal.Decimal (exact) ---
if isinstance(value, Decimal): if isinstance(value, Decimal):