feat(chalice): support data type for events search

This commit is contained in:
Taha Yassine Kraiem 2025-05-13 14:59:46 +02:00
parent ea1c8bfd3b
commit f8dca53073
4 changed files with 99 additions and 7 deletions

View file

@ -4,7 +4,7 @@ import schemas
from chalicelib.utils import helper
from chalicelib.utils import sql_helper as sh
from chalicelib.utils.ch_client import ClickHouseClient
from chalicelib.utils.exp_ch_helper import get_sub_condition
from chalicelib.utils.exp_ch_helper import get_sub_condition, get_col_cast
logger = logging.getLogger(__name__)
PREDEFINED_EVENTS = {
@ -111,11 +111,13 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema):
sub_conditions = []
for j, ef in enumerate(f.properties.filters):
p_k = f"e_{i}_p_{j}"
full_args = {**full_args, **sh.multi_values(ef.value, value_key=p_k)}
full_args = {**full_args, **sh.multi_values(ef.value, value_key=p_k, data_type=ef.data_type)}
cast = get_col_cast(data_type=ef.data_type, value=ef.value)
if ef.is_predefined:
sub_condition = get_sub_condition(col_name=ef.name, val_name=p_k, operator=ef.operator)
sub_condition = get_sub_condition(col_name=f"accurateCastOrNull(`{ef.name}`,'{cast}')",
val_name=p_k, operator=ef.operator)
else:
sub_condition = get_sub_condition(col_name=f"properties.{ef.name}",
sub_condition = get_sub_condition(col_name=f"accurateCastOrNull(properties.`{ef.name}`,{cast})",
val_name=p_k, operator=ef.operator)
sub_conditions.append(sh.multi_conditions(sub_condition, ef.value, value_key=p_k))
if len(sub_conditions) > 0:

View file

@ -1,10 +1,13 @@
import logging
import re
from typing import Union
from typing import Union, Any
import schemas
from chalicelib.utils import sql_helper as sh
from schemas import SearchEventOperator
import math
import struct
from decimal import Decimal
logger = logging.getLogger(__name__)
@ -158,8 +161,79 @@ def simplify_clickhouse_types(ch_types: list[str]) -> list[str]:
def get_sub_condition(col_name: str, val_name: str,
operator: Union[schemas.SearchEventOperator, schemas.MathOperator]):
operator: Union[schemas.SearchEventOperator, schemas.MathOperator]) -> str:
if operator == SearchEventOperator.PATTERN:
return f"match({col_name}, %({val_name})s)"
op = sh.get_sql_operator(operator)
return f"{col_name} {op} %({val_name})s"
def get_col_cast(data_type: schemas.PropertyType, value: Any) -> str:
if value is None or len(value) == 0:
return ""
if data_type in (schemas.PropertyType.INT, schemas.PropertyType.FLOAT):
return best_clickhouse_type(value)
return data_type.capitalize()
# (type_name, minimum, maximum) ordered by increasing size
_INT_RANGES = [
("Int8", -128, 127),
("UInt8", 0, 255),
("Int16", -32_768, 32_767),
("UInt16", 0, 65_535),
("Int32", -2_147_483_648, 2_147_483_647),
("UInt32", 0, 4_294_967_295),
("Int64", -9_223_372_036_854_775_808, 9_223_372_036_854_775_807),
("UInt64", 0, 18_446_744_073_709_551_615),
]
def best_clickhouse_type(value):
"""
Return the most compact ClickHouse numeric type that can store *value* loss-lessly.
>>> best_clickhouse_type(42)
'UInt8'
>>> best_clickhouse_type(-42)
'Int8'
>>> best_clickhouse_type(1.5)
'Float32'
>>> best_clickhouse_type(1e308)
'Float64'
"""
# Treat bool like tiny int
if isinstance(value, bool):
value = int(value)
# --- Integers ---
if isinstance(value, int):
for name, lo, hi in _INT_RANGES:
if lo <= value <= hi:
return name
# Beyond UInt64: ClickHouse offers Int128 / Int256 or Decimal
return "Int128 (or Decimal)"
# --- Decimal.Decimal (exact) ---
if isinstance(value, Decimal):
# ClickHouse Decimal32/64/128 have 9 / 18 / 38 significant digits.
digits = len(value.as_tuple().digits)
if digits <= 9:
return "Decimal32"
elif digits <= 18:
return "Decimal64"
else:
return "Decimal128"
# --- Floats ---
if isinstance(value, float):
if not math.isfinite(value):
return "Float64" # inf / nan → always Float64
# Check if a round-trip through 32-bit float preserves the bit pattern
packed = struct.pack("f", value)
if struct.unpack("f", packed)[0] == value:
return "Float32"
return "Float64"
raise TypeError(f"Unsupported type: {type(value).__name__}")

View file

@ -52,12 +52,16 @@ def multi_conditions(condition, values, value_key="value", is_not=False):
return "(" + (" AND " if is_not else " OR ").join(query) + ")"
def multi_values(values, value_key="value"):
def multi_values(values, value_key="value", data_type: schemas.PropertyType | None = None):
query_values = {}
if values is not None and isinstance(values, list):
for i in range(len(values)):
k = f"{value_key}_{i}"
query_values[k] = values[i].value if isinstance(values[i], Enum) else values[i]
if data_type:
if data_type == schemas.PropertyType.STRING:
query_values[k] = str(query_values[k])
return query_values

View file

@ -581,11 +581,23 @@ class EventPredefinedPropertyType(str, Enum):
IMPORT = "$import"
class PropertyType(str, Enum):
INT = "int"
FLOAT = "float"
DATETIME = "datetime"
STRING = "string"
ARRAY = "array"
TUPLE = "tuple"
MAP = "map"
NESTED = "nested"
class PropertyFilterSchema(BaseModel):
is_event: Literal[False] = False
name: Union[EventPredefinedPropertyType, str] = Field(...)
operator: Union[SearchEventOperator, MathOperator] = Field(...)
value: List[Union[int, str]] = Field(...)
data_type: PropertyType = Field(default=PropertyType.STRING.value)
# property_type: Optional[Literal["string", "number", "date"]] = Field(default=None)