From d1aac6c357d9a507c8def9332c952485aa9d9584 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 14 May 2025 19:15:09 +0200 Subject: [PATCH] fix(chalice): fix EE --- ee/api/chalicelib/utils/exp_ch_helper.py | 91 +++++++++++++++++++++--- 1 file changed, 83 insertions(+), 8 deletions(-) diff --git a/ee/api/chalicelib/utils/exp_ch_helper.py b/ee/api/chalicelib/utils/exp_ch_helper.py index 1c26c1b22..c31707894 100644 --- a/ee/api/chalicelib/utils/exp_ch_helper.py +++ b/ee/api/chalicelib/utils/exp_ch_helper.py @@ -1,9 +1,16 @@ -from typing import Union +import logging +import math +import re +import struct +from decimal import Decimal +from typing import Any, Union + +from decouple import config import schemas +from chalicelib.utils import sql_helper as sh from chalicelib.utils.TimeUTC import TimeUTC -from decouple import config -import logging +from schemas import SearchEventOperator logger = logging.getLogger(__name__) @@ -110,12 +117,13 @@ def simplify_clickhouse_type(ch_type: str) -> str: return "int" # Floats: Float32, Float64 - if re.match(r'^float(32|64)$', normalized_type): + if re.match(r'^float(32|64)|double$', normalized_type): return "float" # Decimal: Decimal(P, S) if normalized_type.startswith("decimal"): - return "decimal" + # return "decimal" + return "float" # Date/DateTime if normalized_type.startswith("date"): @@ -131,11 +139,13 @@ def simplify_clickhouse_type(ch_type: str) -> str: # UUID if normalized_type.startswith("uuid"): - return "uuid" + # return "uuid" + return "string" # Enums: Enum8(...) or Enum16(...) if normalized_type.startswith("enum8") or normalized_type.startswith("enum16"): - return "enum" + # return "enum" + return "string" # Arrays: Array(T) if normalized_type.startswith("array"): @@ -166,8 +176,73 @@ def simplify_clickhouse_types(ch_types: list[str]) -> list[str]: def get_sub_condition(col_name: str, val_name: str, - operator: Union[schemas.SearchEventOperator, schemas.MathOperator]): + operator: Union[schemas.SearchEventOperator, schemas.MathOperator]) -> str: if operator == SearchEventOperator.PATTERN: return f"match({col_name}, %({val_name})s)" op = sh.get_sql_operator(operator) return f"{col_name} {op} %({val_name})s" + + +def get_col_cast(data_type: schemas.PropertyType, value: Any) -> str: + if value is None or len(value) == 0: + return "" + if isinstance(value, list): + value = value[0] + if data_type in (schemas.PropertyType.INT, schemas.PropertyType.FLOAT): + return best_clickhouse_type(value) + return data_type.capitalize() + + +# (type_name, minimum, maximum) – ordered by increasing size +_INT_RANGES = [ + ("Int8", -128, 127), + ("UInt8", 0, 255), + ("Int16", -32_768, 32_767), + ("UInt16", 0, 65_535), + ("Int32", -2_147_483_648, 2_147_483_647), + ("UInt32", 0, 4_294_967_295), + ("Int64", -9_223_372_036_854_775_808, 9_223_372_036_854_775_807), + ("UInt64", 0, 18_446_744_073_709_551_615), +] + + +def best_clickhouse_type(value): + """ + Return the most compact ClickHouse numeric type that can store *value* loss-lessly. + + """ + # Treat bool like tiny int + if isinstance(value, bool): + value = int(value) + + # --- Integers --- + if isinstance(value, int): + for name, lo, hi in _INT_RANGES: + if lo <= value <= hi: + return name + # Beyond UInt64: ClickHouse offers Int128 / Int256 or Decimal + return "Int128" + + # --- Decimal.Decimal (exact) --- + if isinstance(value, Decimal): + # ClickHouse Decimal32/64/128 have 9 / 18 / 38 significant digits. + digits = len(value.as_tuple().digits) + if digits <= 9: + return "Decimal32" + elif digits <= 18: + return "Decimal64" + else: + return "Decimal128" + + # --- Floats --- + if isinstance(value, float): + if not math.isfinite(value): + return "Float64" # inf / nan → always Float64 + + # Check if a round-trip through 32-bit float preserves the bit pattern + packed = struct.pack("f", value) + if struct.unpack("f", packed)[0] == value: + return "Float32" + return "Float64" + + raise TypeError(f"Unsupported type: {type(value).__name__}")