From 70258e5c1d150a04c90a922310d46afe3ee2dd08 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 28 Mar 2025 16:00:27 +0100 Subject: [PATCH] refactor(chalice): simplified supportedTypes for product analytics --- .../core/product_analytics/events.py | 4 +- .../core/product_analytics/properties.py | 6 +- api/chalicelib/utils/exp_ch_helper.py | 86 ++++++++++++++++++- 3 files changed, 91 insertions(+), 5 deletions(-) diff --git a/api/chalicelib/core/product_analytics/events.py b/api/chalicelib/core/product_analytics/events.py index affccec7a..e645618bd 100644 --- a/api/chalicelib/core/product_analytics/events.py +++ b/api/chalicelib/core/product_analytics/events.py @@ -27,7 +27,7 @@ def get_events(project_id: int, page: schemas.PaginatedSchema): for i, row in enumerate(rows): row["id"] = f"event_{i}" row["icon"] = None - row["possibleTypes"] = ["String"] + row["possibleTypes"] = ["string"] row.pop("total") return {"total": total, "list": helper.list_to_camel_case(rows)} @@ -141,6 +141,6 @@ def get_lexicon(project_id: int, page: schemas.PaginatedSchema): for i, row in enumerate(rows): row["id"] = f"event_{i}" row["icon"] = None - row["possibleTypes"] = ["String"] + row["possibleTypes"] = ["string"] row.pop("total") return {"total": total, "list": helper.list_to_camel_case(rows)} diff --git a/api/chalicelib/core/product_analytics/properties.py b/api/chalicelib/core/product_analytics/properties.py index e88f51de1..dcdfc954a 100644 --- a/api/chalicelib/core/product_analytics/properties.py +++ b/api/chalicelib/core/product_analytics/properties.py @@ -1,4 +1,4 @@ -from chalicelib.utils import helper +from chalicelib.utils import helper, exp_ch_helper from chalicelib.utils.ch_client import ClickHouseClient import schemas @@ -22,11 +22,13 @@ def get_all_properties(project_id: int, page: schemas.PaginatedSchema): if len(properties) == 0: return {"total": 0, "list": []} total = properties[0]["total"] + properties = helper.list_to_camel_case(properties) for i, p in enumerate(properties): p["id"] = f"prop_{i}" p["icon"] = None + p["possibleTypes"] = exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]) p.pop("total") - return {"total": total, "list": helper.list_to_camel_case(properties)} + return {"total": total, "list": properties} def get_event_properties(project_id: int, event_name): diff --git a/api/chalicelib/utils/exp_ch_helper.py b/api/chalicelib/utils/exp_ch_helper.py index 15c4986d8..3b2ff7f2d 100644 --- a/api/chalicelib/utils/exp_ch_helper.py +++ b/api/chalicelib/utils/exp_ch_helper.py @@ -1,7 +1,8 @@ +import logging +import re from typing import Union import schemas -import logging logger = logging.getLogger(__name__) @@ -66,3 +67,86 @@ def get_event_type(event_type: Union[schemas.EventType, schemas.PerformanceEvent if event_type not in defs: raise Exception(f"unsupported EventType:{event_type}") return defs.get(event_type) + + +# AI generated +def simplify_clickhouse_type(ch_type: str) -> str: + """ + Simplify a ClickHouse data type name to a broader category like: + int, float, decimal, datetime, string, uuid, enum, array, tuple, map, nested, etc. + """ + + # 1) Strip out common wrappers like Nullable(...) or LowCardinality(...) + # Possibly multiple wrappers: e.g. "LowCardinality(Nullable(Int32))" + pattern_wrappers = re.compile(r'(Nullable|LowCardinality)\((.*)\)') + while True: + match = pattern_wrappers.match(ch_type) + if match: + ch_type = match.group(2) + else: + break + + # 2) Normalize (lowercase) for easier checks + normalized_type = ch_type.lower() + + # 3) Use pattern matching or direct checks for known categories + # (You can adapt this as you see fit for your environment.) + + # Integers: Int8, Int16, Int32, Int64, Int128, Int256, UInt8, UInt16, ... + if re.match(r'^(u?int)(8|16|32|64|128|256)$', normalized_type): + return "int" + + # Floats: Float32, Float64 + if re.match(r'^float(32|64)$', normalized_type): + return "float" + + # Decimal: Decimal(P, S) + if normalized_type.startswith("decimal"): + return "decimal" + + # Date/DateTime + if normalized_type.startswith("date"): + return "datetime" + if normalized_type.startswith("datetime"): + return "datetime" + + # Strings: String, FixedString(N) + if normalized_type.startswith("string"): + return "string" + if normalized_type.startswith("fixedstring"): + return "string" + + # UUID + if normalized_type.startswith("uuid"): + return "uuid" + + # Enums: Enum8(...) or Enum16(...) + if normalized_type.startswith("enum8") or normalized_type.startswith("enum16"): + return "enum" + + # Arrays: Array(T) + if normalized_type.startswith("array"): + return "array" + + # Tuples: Tuple(T1, T2, ...) + if normalized_type.startswith("tuple"): + return "tuple" + + # Map(K, V) + if normalized_type.startswith("map"): + return "map" + + # Nested(...) + if normalized_type.startswith("nested"): + return "nested" + + # If we didn't match above, just return the original type in lowercase + return normalized_type + + +def simplify_clickhouse_types(ch_types: list[str]) -> list[str]: + """ + Takes a list of ClickHouse types and returns a list of simplified types + by calling `simplify_clickhouse_type` on each. + """ + return list(set([simplify_clickhouse_type(t) for t in ch_types]))