refactor(chalice): changed predefined properties types handling

refactor(DB): changed predefined properties types
This commit is contained in:
Taha Yassine Kraiem 2025-04-29 16:38:23 +01:00 committed by Kraiem Taha Yassine
parent 812983f97c
commit 39d3d8db4c
5 changed files with 63 additions and 267 deletions

View file

@ -1,32 +1,62 @@
import re
from functools import cache
import schemas import schemas
from chalicelib.utils import helper, exp_ch_helper from chalicelib.utils import helper, exp_ch_helper
from chalicelib.utils.ch_client import ClickHouseClient from chalicelib.utils.ch_client import ClickHouseClient
PREDEFINED_PROPERTY_TYPES = {
@cache "label": "String",
def get_predefined_property_types(): "hesitation_time": "UInt32",
with ClickHouseClient() as ch_client: "name": "String",
properties_type = ch_client.execute("""\ "payload": "String",
SELECT type "level": "Enum8",
FROM system.columns "source": "Enum8",
WHERE database = 'product_analytics' "message": "String",
AND table = 'events' "error_id": "String",
AND name = '$properties';""") "duration": "UInt16",
if len(properties_type) == 0: "context": "Enum8",
return {} "url_host": "String",
properties_type = properties_type[0]["type"] "url_path": "String",
"url_hostpath": "String",
pattern = r'(\w+)\s+(Enum8\([^\)]+\)|[A-Za-z0-9_]+(?:\([^\)]+\))?)' "request_start": "UInt16",
"response_start": "UInt16",
# Find all matches "response_end": "UInt16",
matches = re.findall(pattern, properties_type) "dom_content_loaded_event_start": "UInt16",
"dom_content_loaded_event_end": "UInt16",
# Create a dictionary of attribute names and types "load_event_start": "UInt16",
attributes = {match[0]: match[1] for match in matches} "load_event_end": "UInt16",
return attributes "first_paint": "UInt16",
"first_contentful_paint_time": "UInt16",
"speed_index": "UInt16",
"visually_complete": "UInt16",
"time_to_interactive": "UInt16",
"ttfb": "UInt16",
"ttlb": "UInt16",
"response_time": "UInt16",
"dom_building_time": "UInt16",
"dom_content_loaded_event_time": "UInt16",
"load_event_time": "UInt16",
"min_fps": "UInt8",
"avg_fps": "UInt8",
"max_fps": "UInt8",
"min_cpu": "UInt8",
"avg_cpu": "UInt8",
"max_cpu": "UInt8",
"min_total_js_heap_size": "UInt64",
"avg_total_js_heap_size": "UInt64",
"max_total_js_heap_size": "UInt64",
"min_used_js_heap_size": "UInt64",
"avg_used_js_heap_size": "UInt64",
"max_used_js_heap_size": "UInt64",
"method": "Enum8",
"status": "UInt16",
"success": "UInt8",
"request_body": "String",
"response_body": "String",
"transfer_size": "UInt32",
"selector": "String",
"normalized_x": "Float32",
"normalized_y": "Float32",
"message_id": "UInt64"
}
def get_all_properties(project_id: int, page: schemas.PaginatedSchema): def get_all_properties(project_id: int, page: schemas.PaginatedSchema):
@ -49,12 +79,11 @@ def get_all_properties(project_id: int, page: schemas.PaginatedSchema):
return {"total": 0, "list": []} return {"total": 0, "list": []}
total = properties[0]["total"] total = properties[0]["total"]
properties = helper.list_to_camel_case(properties) properties = helper.list_to_camel_case(properties)
predefined_properties = get_predefined_property_types()
for i, p in enumerate(properties): for i, p in enumerate(properties):
p["id"] = f"prop_{i}" p["id"] = f"prop_{i}"
p["_foundInPredefinedList"] = False p["_foundInPredefinedList"] = False
if p["name"] in predefined_properties: if p["name"] in PREDEFINED_PROPERTY_TYPES:
p["dataType"] = exp_ch_helper.simplify_clickhouse_type(predefined_properties[p["name"]]) p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]])
p["_foundInPredefinedList"] = True p["_foundInPredefinedList"] = True
p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]))) p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"])))
p.pop("total") p.pop("total")
@ -77,12 +106,11 @@ def get_event_properties(project_id: int, event_name):
parameters={"project_id": project_id, "event_name": event_name}) parameters={"project_id": project_id, "event_name": event_name})
properties = ch_client.execute(r) properties = ch_client.execute(r)
properties = helper.list_to_camel_case(properties) properties = helper.list_to_camel_case(properties)
predefined_properties = get_predefined_property_types()
for i, p in enumerate(properties): for i, p in enumerate(properties):
p["id"] = f"prop_{i}" p["id"] = f"prop_{i}"
p["_foundInPredefinedList"] = False p["_foundInPredefinedList"] = False
if p["name"] in predefined_properties: if p["name"] in PREDEFINED_PROPERTY_TYPES:
p["dataType"] = exp_ch_helper.simplify_clickhouse_type(predefined_properties[p["name"]]) p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]])
p["_foundInPredefinedList"] = True p["_foundInPredefinedList"] = True
p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]))) p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"])))

View file

@ -1,65 +1,5 @@
CREATE OR REPLACE FUNCTION openreplay_version AS() -> 'v1.23.0-ee'; CREATE OR REPLACE FUNCTION openreplay_version AS() -> 'v1.23.0-ee';
SET allow_experimental_json_type = 1;
SET enable_json_type = 1;
ALTER TABLE product_analytics.events
MODIFY COLUMN `$properties` JSON(
max_dynamic_paths=0,
label String ,
hesitation_time UInt32 ,
name String ,
payload String ,
level Enum8 ('info'=0, 'error'=1),
source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9),
message String ,
error_id String ,
duration UInt16,
context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8),
url_host String ,
url_path String ,
url_hostpath String ,
request_start UInt16 ,
response_start UInt16 ,
response_end UInt16 ,
dom_content_loaded_event_start UInt16 ,
dom_content_loaded_event_end UInt16 ,
load_event_start UInt16 ,
load_event_end UInt16 ,
first_paint UInt16 ,
first_contentful_paint_time UInt16 ,
speed_index UInt16 ,
visually_complete UInt16 ,
time_to_interactive UInt16,
ttfb UInt16,
ttlb UInt16,
response_time UInt16,
dom_building_time UInt16,
dom_content_loaded_event_time UInt16,
load_event_time UInt16,
min_fps UInt8,
avg_fps UInt8,
max_fps UInt8,
min_cpu UInt8,
avg_cpu UInt8,
max_cpu UInt8,
min_total_js_heap_size UInt64,
avg_total_js_heap_size UInt64,
max_total_js_heap_size UInt64,
min_used_js_heap_size UInt64,
avg_used_js_heap_size UInt64,
max_used_js_heap_size UInt64,
method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8),
status UInt16,
success UInt8,
request_body String,
response_body String,
transfer_size UInt32,
selector String,
normalized_x Float32,
normalized_y Float32,
message_id UInt64
) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events';
DROP TABLE IF EXISTS product_analytics.all_events; DROP TABLE IF EXISTS product_analytics.all_events;
CREATE TABLE IF NOT EXISTS product_analytics.all_events CREATE TABLE IF NOT EXISTS product_analytics.all_events
( (

View file

@ -431,62 +431,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.events
"$source" LowCardinality(String) DEFAULT '' COMMENT 'the name of the integration that sent the event', "$source" LowCardinality(String) DEFAULT '' COMMENT 'the name of the integration that sent the event',
"$duration_s" UInt16 DEFAULT 0 COMMENT 'the duration from session-start in seconds', "$duration_s" UInt16 DEFAULT 0 COMMENT 'the duration from session-start in seconds',
properties JSON DEFAULT '{}', properties JSON DEFAULT '{}',
"$properties" JSON( "$properties" JSON DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events',
max_dynamic_paths=0,
label String ,
hesitation_time UInt32 ,
name String ,
payload String ,
level Enum8 ('info'=0, 'error'=1),
source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9),
message String ,
error_id String ,
duration UInt16,
context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8),
url_host String ,
url_path String ,
url_hostpath String ,
request_start UInt16 ,
response_start UInt16 ,
response_end UInt16 ,
dom_content_loaded_event_start UInt16 ,
dom_content_loaded_event_end UInt16 ,
load_event_start UInt16 ,
load_event_end UInt16 ,
first_paint UInt16 ,
first_contentful_paint_time UInt16 ,
speed_index UInt16 ,
visually_complete UInt16 ,
time_to_interactive UInt16,
ttfb UInt16,
ttlb UInt16,
response_time UInt16,
dom_building_time UInt16,
dom_content_loaded_event_time UInt16,
load_event_time UInt16,
min_fps UInt8,
avg_fps UInt8,
max_fps UInt8,
min_cpu UInt8,
avg_cpu UInt8,
max_cpu UInt8,
min_total_js_heap_size UInt64,
avg_total_js_heap_size UInt64,
max_total_js_heap_size UInt64,
min_used_js_heap_size UInt64,
avg_used_js_heap_size UInt64,
max_used_js_heap_size UInt64,
method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8),
status UInt16,
success UInt8,
request_body String,
response_body String,
transfer_size UInt32,
selector String,
normalized_x Float32,
normalized_y Float32,
message_id UInt64
) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events',
description String DEFAULT '', description String DEFAULT '',
group_id1 Array(String) DEFAULT [], group_id1 Array(String) DEFAULT [],
group_id2 Array(String) DEFAULT [], group_id2 Array(String) DEFAULT [],
@ -846,8 +791,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples
ENGINE = ReplacingMergeTree(_timestamp) ENGINE = ReplacingMergeTree(_timestamp)
ORDER BY (project_id, property_name, is_event_property); ORDER BY (project_id, property_name, is_event_property);
-- Incremental materialized view to get random examples of property values using $properties & properties -- Incremental materialized view to get random examples of property values using $properties & properties
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS
REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS
SELECT project_id, SELECT project_id,
property_name, property_name,
TRUE AS is_event_property, TRUE AS is_event_property,

View file

@ -12,66 +12,6 @@ CREATE TABLE IF NOT EXISTS experimental.user_viewed_sessions
ORDER BY (project_id, user_id, session_id) ORDER BY (project_id, user_id, session_id)
TTL _timestamp + INTERVAL 3 MONTH; TTL _timestamp + INTERVAL 3 MONTH;
SET allow_experimental_json_type = 1;
SET enable_json_type = 1;
ALTER TABLE product_analytics.events
MODIFY COLUMN `$properties` JSON(
max_dynamic_paths=0,
label String ,
hesitation_time UInt32 ,
name String ,
payload String ,
level Enum8 ('info'=0, 'error'=1),
source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9),
message String ,
error_id String ,
duration UInt16,
context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8),
url_host String ,
url_path String ,
url_hostpath String ,
request_start UInt16 ,
response_start UInt16 ,
response_end UInt16 ,
dom_content_loaded_event_start UInt16 ,
dom_content_loaded_event_end UInt16 ,
load_event_start UInt16 ,
load_event_end UInt16 ,
first_paint UInt16 ,
first_contentful_paint_time UInt16 ,
speed_index UInt16 ,
visually_complete UInt16 ,
time_to_interactive UInt16,
ttfb UInt16,
ttlb UInt16,
response_time UInt16,
dom_building_time UInt16,
dom_content_loaded_event_time UInt16,
load_event_time UInt16,
min_fps UInt8,
avg_fps UInt8,
max_fps UInt8,
min_cpu UInt8,
avg_cpu UInt8,
max_cpu UInt8,
min_total_js_heap_size UInt64,
avg_total_js_heap_size UInt64,
max_total_js_heap_size UInt64,
min_used_js_heap_size UInt64,
avg_used_js_heap_size UInt64,
max_used_js_heap_size UInt64,
method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8),
status UInt16,
success UInt8,
request_body String,
response_body String,
transfer_size UInt32,
selector String,
normalized_x Float32,
normalized_y Float32,
message_id UInt64
) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events';
DROP TABLE IF EXISTS product_analytics.all_events; DROP TABLE IF EXISTS product_analytics.all_events;
CREATE TABLE IF NOT EXISTS product_analytics.all_events CREATE TABLE IF NOT EXISTS product_analytics.all_events
( (

View file

@ -330,62 +330,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.events
"$source" LowCardinality(String) DEFAULT '' COMMENT 'the name of the integration that sent the event', "$source" LowCardinality(String) DEFAULT '' COMMENT 'the name of the integration that sent the event',
"$duration_s" UInt16 DEFAULT 0 COMMENT 'the duration from session-start in seconds', "$duration_s" UInt16 DEFAULT 0 COMMENT 'the duration from session-start in seconds',
properties JSON DEFAULT '{}', properties JSON DEFAULT '{}',
"$properties" JSON( "$properties" JSON DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events',
max_dynamic_paths=0,
label String ,
hesitation_time UInt32 ,
name String ,
payload String ,
level Enum8 ('info'=0, 'error'=1),
source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9),
message String ,
error_id String ,
duration UInt16,
context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8),
url_host String ,
url_path String ,
url_hostpath String ,
request_start UInt16 ,
response_start UInt16 ,
response_end UInt16 ,
dom_content_loaded_event_start UInt16 ,
dom_content_loaded_event_end UInt16 ,
load_event_start UInt16 ,
load_event_end UInt16 ,
first_paint UInt16 ,
first_contentful_paint_time UInt16 ,
speed_index UInt16 ,
visually_complete UInt16 ,
time_to_interactive UInt16,
ttfb UInt16,
ttlb UInt16,
response_time UInt16,
dom_building_time UInt16,
dom_content_loaded_event_time UInt16,
load_event_time UInt16,
min_fps UInt8,
avg_fps UInt8,
max_fps UInt8,
min_cpu UInt8,
avg_cpu UInt8,
max_cpu UInt8,
min_total_js_heap_size UInt64,
avg_total_js_heap_size UInt64,
max_total_js_heap_size UInt64,
min_used_js_heap_size UInt64,
avg_used_js_heap_size UInt64,
max_used_js_heap_size UInt64,
method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8),
status UInt16,
success UInt8,
request_body String,
response_body String,
transfer_size UInt32,
selector String,
normalized_x Float32,
normalized_y Float32,
message_id UInt64
) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events',
description String DEFAULT '', description String DEFAULT '',
group_id1 Array(String) DEFAULT [], group_id1 Array(String) DEFAULT [],
group_id2 Array(String) DEFAULT [], group_id2 Array(String) DEFAULT [],
@ -745,8 +690,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples
ENGINE = ReplacingMergeTree(_timestamp) ENGINE = ReplacingMergeTree(_timestamp)
ORDER BY (project_id, property_name, is_event_property); ORDER BY (project_id, property_name, is_event_property);
-- Incremental materialized view to get random examples of property values using $properties & properties -- Incremental materialized view to get random examples of property values using $properties & properties
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS
REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS
SELECT project_id, SELECT project_id,
property_name, property_name,
TRUE AS is_event_property, TRUE AS is_event_property,