openreplay/api/chalicelib/utils/exp_ch_helper.py
Taha Yassine Kraiem ed39bbf1d4 fix(chalice): fixed missing timestamp in sessions replay
fix(chalice): fixed nested custom events in session replay
fix(chalice): fixed issues events in session replay
2025-05-27 12:22:36 +02:00

249 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import math
import re
import struct
from decimal import Decimal
from typing import Union, Any
import schemas
from chalicelib.utils import sql_helper as sh
from chalicelib.utils.TimeUTC import TimeUTC
from schemas import SearchEventOperator
logger = logging.getLogger(__name__)
def get_main_events_table(timestamp=0, platform="web"):
if platform == "web":
return "product_analytics.events"
else:
return "experimental.ios_events"
def get_main_sessions_table(timestamp=0):
return "experimental.sessions"
def get_user_favorite_sessions_table(timestamp=0):
return "experimental.user_favorite_sessions"
def get_user_viewed_sessions_table(timestamp=0):
return "experimental.user_viewed_sessions"
def get_user_viewed_errors_table(timestamp=0):
return "experimental.user_viewed_errors"
def get_main_js_errors_sessions_table(timestamp=0):
return get_main_events_table(timestamp=timestamp)
def get_event_type(event_type: Union[schemas.EventType, schemas.PerformanceEventType], platform="web"):
defs = {
schemas.EventType.CLICK: "CLICK",
schemas.EventType.INPUT: "INPUT",
schemas.EventType.LOCATION: "LOCATION",
schemas.PerformanceEventType.LOCATION_DOM_COMPLETE: "LOCATION",
schemas.PerformanceEventType.LOCATION_LARGEST_CONTENTFUL_PAINT_TIME: "LOCATION",
schemas.PerformanceEventType.LOCATION_TTFB: "LOCATION",
schemas.EventType.CUSTOM: "CUSTOM",
schemas.EventType.REQUEST: "REQUEST",
schemas.EventType.REQUEST_DETAILS: "REQUEST",
schemas.PerformanceEventType.FETCH_FAILED: "REQUEST",
schemas.GraphqlFilterType.GRAPHQL_NAME: "GRAPHQL",
schemas.EventType.STATE_ACTION: "STATEACTION",
schemas.EventType.ERROR: "ERROR",
schemas.PerformanceEventType.LOCATION_AVG_CPU_LOAD: 'PERFORMANCE',
schemas.PerformanceEventType.LOCATION_AVG_MEMORY_USAGE: 'PERFORMANCE',
schemas.FetchFilterType.FETCH_URL: 'REQUEST',
schemas.EventType.INCIDENT: "INCIDENT",
}
defs_mobile = {
schemas.EventType.CLICK_MOBILE: "TAP",
schemas.EventType.INPUT_MOBILE: "INPUT",
schemas.EventType.CUSTOM_MOBILE: "CUSTOM",
schemas.EventType.REQUEST_MOBILE: "REQUEST",
schemas.EventType.ERROR_MOBILE: "CRASH",
schemas.EventType.VIEW_MOBILE: "VIEW",
schemas.EventType.SWIPE_MOBILE: "SWIPE",
schemas.EventType.INCIDENT: "INCIDENT"
}
if platform != "web" and event_type in defs_mobile:
return defs_mobile.get(event_type)
if event_type not in defs:
raise Exception(f"unsupported EventType:{event_type}")
return defs.get(event_type)
# AI generated
def simplify_clickhouse_type(ch_type: str) -> str:
"""
Simplify a ClickHouse data type name to a broader category like:
int, float, decimal, datetime, string, uuid, enum, array, tuple, map, nested, etc.
"""
# 1) Strip out common wrappers like Nullable(...) or LowCardinality(...)
# Possibly multiple wrappers: e.g. "LowCardinality(Nullable(Int32))"
pattern_wrappers = re.compile(r'(Nullable|LowCardinality)\((.*)\)')
while True:
match = pattern_wrappers.match(ch_type)
if match:
ch_type = match.group(2)
else:
break
# 2) Normalize (lowercase) for easier checks
normalized_type = ch_type.lower()
# 3) Use pattern matching or direct checks for known categories
# (You can adapt this as you see fit for your environment.)
# Integers: Int8, Int16, Int32, Int64, Int128, Int256, UInt8, UInt16, ...
if re.match(r'^(u?int)(8|16|32|64|128|256)$', normalized_type):
return "int"
# Floats: Float32, Float64
if re.match(r'^float(32|64)|double$', normalized_type):
return "float"
# Decimal: Decimal(P, S)
if normalized_type.startswith("decimal"):
# return "decimal"
return "float"
# Date/DateTime
if normalized_type.startswith("date"):
return "datetime"
if normalized_type.startswith("datetime"):
return "datetime"
# Strings: String, FixedString(N)
if normalized_type.startswith("string"):
return "string"
if normalized_type.startswith("fixedstring"):
return "string"
# UUID
if normalized_type.startswith("uuid"):
# return "uuid"
return "string"
# Enums: Enum8(...) or Enum16(...)
if normalized_type.startswith("enum8") or normalized_type.startswith("enum16"):
# return "enum"
return "string"
# Arrays: Array(T)
if normalized_type.startswith("array"):
return "array"
# Tuples: Tuple(T1, T2, ...)
if normalized_type.startswith("tuple"):
return "tuple"
# Map(K, V)
if normalized_type.startswith("map"):
return "map"
# Nested(...)
if normalized_type.startswith("nested"):
return "nested"
# If we didn't match above, just return the original type in lowercase
return normalized_type
def simplify_clickhouse_types(ch_types: list[str]) -> list[str]:
"""
Takes a list of ClickHouse types and returns a list of simplified types
by calling `simplify_clickhouse_type` on each.
"""
return list(set([simplify_clickhouse_type(t) for t in ch_types]))
def get_sub_condition(col_name: str, val_name: str,
operator: Union[schemas.SearchEventOperator, schemas.MathOperator]) -> str:
if operator == SearchEventOperator.PATTERN:
return f"match({col_name}, %({val_name})s)"
op = sh.get_sql_operator(operator)
return f"{col_name} {op} %({val_name})s"
def get_col_cast(data_type: schemas.PropertyType, value: Any) -> str:
if value is None or len(value) == 0:
return ""
if isinstance(value, list):
value = value[0]
if data_type in (schemas.PropertyType.INT, schemas.PropertyType.FLOAT):
return best_clickhouse_type(value)
return data_type.capitalize()
# (type_name, minimum, maximum) ordered by increasing size
_INT_RANGES = [
("Int8", -128, 127),
("UInt8", 0, 255),
("Int16", -32_768, 32_767),
("UInt16", 0, 65_535),
("Int32", -2_147_483_648, 2_147_483_647),
("UInt32", 0, 4_294_967_295),
("Int64", -9_223_372_036_854_775_808, 9_223_372_036_854_775_807),
("UInt64", 0, 18_446_744_073_709_551_615),
]
def best_clickhouse_type(value):
"""
Return the most compact ClickHouse numeric type that can store *value* loss-lessly.
"""
# Treat bool like tiny int
if isinstance(value, bool):
value = int(value)
# --- Integers ---
if isinstance(value, int):
for name, lo, hi in _INT_RANGES:
if lo <= value <= hi:
return name
# Beyond UInt64: ClickHouse offers Int128 / Int256 or Decimal
return "Int128"
# --- Decimal.Decimal (exact) ---
if isinstance(value, Decimal):
# ClickHouse Decimal32/64/128 have 9 / 18 / 38 significant digits.
digits = len(value.as_tuple().digits)
if digits <= 9:
return "Decimal32"
elif digits <= 18:
return "Decimal64"
else:
return "Decimal128"
# --- Floats ---
if isinstance(value, float):
if not math.isfinite(value):
return "Float64" # inf / nan → always Float64
# Check if a round-trip through 32-bit float preserves the bit pattern
packed = struct.pack("f", value)
if struct.unpack("f", packed)[0] == value:
return "Float32"
return "Float64"
raise TypeError(f"Unsupported type: {type(value).__name__}")
def explode_dproperties(rows):
for i in range(len(rows)):
rows[i] = {**rows[i], **rows[i]["$properties"]}
rows[i].pop("$properties")
return rows
def add_timestamp(rows):
for row in rows:
row["timestamp"] = TimeUTC.datetime_to_timestamp(row["createdAt"])
return rows