diff --git a/api/chalicelib/core/product_analytics/properties.py b/api/chalicelib/core/product_analytics/properties.py index 635ced5f4..a34810d69 100644 --- a/api/chalicelib/core/product_analytics/properties.py +++ b/api/chalicelib/core/product_analytics/properties.py @@ -1,32 +1,62 @@ -import re -from functools import cache - import schemas from chalicelib.utils import helper, exp_ch_helper from chalicelib.utils.ch_client import ClickHouseClient - -@cache -def get_predefined_property_types(): - with ClickHouseClient() as ch_client: - properties_type = ch_client.execute("""\ - SELECT type - FROM system.columns - WHERE database = 'product_analytics' - AND table = 'events' - AND name = '$properties';""") - if len(properties_type) == 0: - return {} - properties_type = properties_type[0]["type"] - - pattern = r'(\w+)\s+(Enum8\([^\)]+\)|[A-Za-z0-9_]+(?:\([^\)]+\))?)' - - # Find all matches - matches = re.findall(pattern, properties_type) - - # Create a dictionary of attribute names and types - attributes = {match[0]: match[1] for match in matches} - return attributes +PREDEFINED_PROPERTY_TYPES = { + "label": "String", + "hesitation_time": "UInt32", + "name": "String", + "payload": "String", + "level": "Enum8", + "source": "Enum8", + "message": "String", + "error_id": "String", + "duration": "UInt16", + "context": "Enum8", + "url_host": "String", + "url_path": "String", + "url_hostpath": "String", + "request_start": "UInt16", + "response_start": "UInt16", + "response_end": "UInt16", + "dom_content_loaded_event_start": "UInt16", + "dom_content_loaded_event_end": "UInt16", + "load_event_start": "UInt16", + "load_event_end": "UInt16", + "first_paint": "UInt16", + "first_contentful_paint_time": "UInt16", + "speed_index": "UInt16", + "visually_complete": "UInt16", + "time_to_interactive": "UInt16", + "ttfb": "UInt16", + "ttlb": "UInt16", + "response_time": "UInt16", + "dom_building_time": "UInt16", + "dom_content_loaded_event_time": "UInt16", + "load_event_time": "UInt16", + "min_fps": "UInt8", + "avg_fps": "UInt8", + "max_fps": "UInt8", + "min_cpu": "UInt8", + "avg_cpu": "UInt8", + "max_cpu": "UInt8", + "min_total_js_heap_size": "UInt64", + "avg_total_js_heap_size": "UInt64", + "max_total_js_heap_size": "UInt64", + "min_used_js_heap_size": "UInt64", + "avg_used_js_heap_size": "UInt64", + "max_used_js_heap_size": "UInt64", + "method": "Enum8", + "status": "UInt16", + "success": "UInt8", + "request_body": "String", + "response_body": "String", + "transfer_size": "UInt32", + "selector": "String", + "normalized_x": "Float32", + "normalized_y": "Float32", + "message_id": "UInt64" +} def get_all_properties(project_id: int, page: schemas.PaginatedSchema): @@ -49,12 +79,11 @@ def get_all_properties(project_id: int, page: schemas.PaginatedSchema): return {"total": 0, "list": []} total = properties[0]["total"] properties = helper.list_to_camel_case(properties) - predefined_properties = get_predefined_property_types() for i, p in enumerate(properties): p["id"] = f"prop_{i}" p["_foundInPredefinedList"] = False - if p["name"] in predefined_properties: - p["dataType"] = exp_ch_helper.simplify_clickhouse_type(predefined_properties[p["name"]]) + if p["name"] in PREDEFINED_PROPERTY_TYPES: + p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]]) p["_foundInPredefinedList"] = True p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]))) p.pop("total") @@ -77,12 +106,11 @@ def get_event_properties(project_id: int, event_name): parameters={"project_id": project_id, "event_name": event_name}) properties = ch_client.execute(r) properties = helper.list_to_camel_case(properties) - predefined_properties = get_predefined_property_types() for i, p in enumerate(properties): p["id"] = f"prop_{i}" p["_foundInPredefinedList"] = False - if p["name"] in predefined_properties: - p["dataType"] = exp_ch_helper.simplify_clickhouse_type(predefined_properties[p["name"]]) + if p["name"] in PREDEFINED_PROPERTY_TYPES: + p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]]) p["_foundInPredefinedList"] = True p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]))) diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql b/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql index dcd616e5f..889bb4d49 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql @@ -1,65 +1,5 @@ CREATE OR REPLACE FUNCTION openreplay_version AS() -> 'v1.23.0-ee'; -SET allow_experimental_json_type = 1; -SET enable_json_type = 1; -ALTER TABLE product_analytics.events - MODIFY COLUMN `$properties` JSON( -max_dynamic_paths=0, -label String , -hesitation_time UInt32 , -name String , -payload String , -level Enum8 ('info'=0, 'error'=1), -source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9), -message String , -error_id String , -duration UInt16, -context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8), -url_host String , -url_path String , -url_hostpath String , -request_start UInt16 , -response_start UInt16 , -response_end UInt16 , -dom_content_loaded_event_start UInt16 , -dom_content_loaded_event_end UInt16 , -load_event_start UInt16 , -load_event_end UInt16 , -first_paint UInt16 , -first_contentful_paint_time UInt16 , -speed_index UInt16 , -visually_complete UInt16 , -time_to_interactive UInt16, -ttfb UInt16, -ttlb UInt16, -response_time UInt16, -dom_building_time UInt16, -dom_content_loaded_event_time UInt16, -load_event_time UInt16, -min_fps UInt8, -avg_fps UInt8, -max_fps UInt8, -min_cpu UInt8, -avg_cpu UInt8, -max_cpu UInt8, -min_total_js_heap_size UInt64, -avg_total_js_heap_size UInt64, -max_total_js_heap_size UInt64, -min_used_js_heap_size UInt64, -avg_used_js_heap_size UInt64, -max_used_js_heap_size UInt64, -method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8), -status UInt16, -success UInt8, -request_body String, -response_body String, -transfer_size UInt32, -selector String, -normalized_x Float32, -normalized_y Float32, -message_id UInt64 -) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events'; - DROP TABLE IF EXISTS product_analytics.all_events; CREATE TABLE IF NOT EXISTS product_analytics.all_events ( diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index 5f6a06511..f1c2fbb66 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -431,62 +431,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.events "$source" LowCardinality(String) DEFAULT '' COMMENT 'the name of the integration that sent the event', "$duration_s" UInt16 DEFAULT 0 COMMENT 'the duration from session-start in seconds', properties JSON DEFAULT '{}', - "$properties" JSON( -max_dynamic_paths=0, -label String , -hesitation_time UInt32 , -name String , -payload String , -level Enum8 ('info'=0, 'error'=1), -source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9), -message String , -error_id String , -duration UInt16, -context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8), -url_host String , -url_path String , -url_hostpath String , -request_start UInt16 , -response_start UInt16 , -response_end UInt16 , -dom_content_loaded_event_start UInt16 , -dom_content_loaded_event_end UInt16 , -load_event_start UInt16 , -load_event_end UInt16 , -first_paint UInt16 , -first_contentful_paint_time UInt16 , -speed_index UInt16 , -visually_complete UInt16 , -time_to_interactive UInt16, -ttfb UInt16, -ttlb UInt16, -response_time UInt16, -dom_building_time UInt16, -dom_content_loaded_event_time UInt16, -load_event_time UInt16, -min_fps UInt8, -avg_fps UInt8, -max_fps UInt8, -min_cpu UInt8, -avg_cpu UInt8, -max_cpu UInt8, -min_total_js_heap_size UInt64, -avg_total_js_heap_size UInt64, -max_total_js_heap_size UInt64, -min_used_js_heap_size UInt64, -avg_used_js_heap_size UInt64, -max_used_js_heap_size UInt64, -method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8), -status UInt16, -success UInt8, -request_body String, -response_body String, -transfer_size UInt32, -selector String, -normalized_x Float32, -normalized_y Float32, -message_id UInt64 -) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events', + "$properties" JSON DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events', description String DEFAULT '', group_id1 Array(String) DEFAULT [], group_id2 Array(String) DEFAULT [], @@ -846,8 +791,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples ENGINE = ReplacingMergeTree(_timestamp) ORDER BY (project_id, property_name, is_event_property); -- Incremental materialized view to get random examples of property values using $properties & properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv - REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS SELECT project_id, property_name, TRUE AS is_event_property, diff --git a/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql b/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql index 7bab7f7a1..1be61c988 100644 --- a/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql +++ b/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql @@ -12,66 +12,6 @@ CREATE TABLE IF NOT EXISTS experimental.user_viewed_sessions ORDER BY (project_id, user_id, session_id) TTL _timestamp + INTERVAL 3 MONTH; -SET allow_experimental_json_type = 1; -SET enable_json_type = 1; -ALTER TABLE product_analytics.events - MODIFY COLUMN `$properties` JSON( -max_dynamic_paths=0, -label String , -hesitation_time UInt32 , -name String , -payload String , -level Enum8 ('info'=0, 'error'=1), -source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9), -message String , -error_id String , -duration UInt16, -context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8), -url_host String , -url_path String , -url_hostpath String , -request_start UInt16 , -response_start UInt16 , -response_end UInt16 , -dom_content_loaded_event_start UInt16 , -dom_content_loaded_event_end UInt16 , -load_event_start UInt16 , -load_event_end UInt16 , -first_paint UInt16 , -first_contentful_paint_time UInt16 , -speed_index UInt16 , -visually_complete UInt16 , -time_to_interactive UInt16, -ttfb UInt16, -ttlb UInt16, -response_time UInt16, -dom_building_time UInt16, -dom_content_loaded_event_time UInt16, -load_event_time UInt16, -min_fps UInt8, -avg_fps UInt8, -max_fps UInt8, -min_cpu UInt8, -avg_cpu UInt8, -max_cpu UInt8, -min_total_js_heap_size UInt64, -avg_total_js_heap_size UInt64, -max_total_js_heap_size UInt64, -min_used_js_heap_size UInt64, -avg_used_js_heap_size UInt64, -max_used_js_heap_size UInt64, -method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8), -status UInt16, -success UInt8, -request_body String, -response_body String, -transfer_size UInt32, -selector String, -normalized_x Float32, -normalized_y Float32, -message_id UInt64 -) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events'; - DROP TABLE IF EXISTS product_analytics.all_events; CREATE TABLE IF NOT EXISTS product_analytics.all_events ( diff --git a/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index 1283dc4e9..6d6c196eb 100644 --- a/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -330,62 +330,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.events "$source" LowCardinality(String) DEFAULT '' COMMENT 'the name of the integration that sent the event', "$duration_s" UInt16 DEFAULT 0 COMMENT 'the duration from session-start in seconds', properties JSON DEFAULT '{}', - "$properties" JSON( -max_dynamic_paths=0, -label String , -hesitation_time UInt32 , -name String , -payload String , -level Enum8 ('info'=0, 'error'=1), -source Enum8 ('js_exception'=0, 'bugsnag'=1, 'cloudwatch'=2, 'datadog'=3, 'elasticsearch'=4, 'newrelic'=5, 'rollbar'=6, 'sentry'=7, 'stackdriver'=8, 'sumologic'=9), -message String , -error_id String , -duration UInt16, -context Enum8('unknown'=0, 'self'=1, 'same-origin-ancestor'=2, 'same-origin-descendant'=3, 'same-origin'=4, 'cross-origin-ancestor'=5, 'cross-origin-descendant'=6, 'cross-origin-unreachable'=7, 'multiple-contexts'=8), -url_host String , -url_path String , -url_hostpath String , -request_start UInt16 , -response_start UInt16 , -response_end UInt16 , -dom_content_loaded_event_start UInt16 , -dom_content_loaded_event_end UInt16 , -load_event_start UInt16 , -load_event_end UInt16 , -first_paint UInt16 , -first_contentful_paint_time UInt16 , -speed_index UInt16 , -visually_complete UInt16 , -time_to_interactive UInt16, -ttfb UInt16, -ttlb UInt16, -response_time UInt16, -dom_building_time UInt16, -dom_content_loaded_event_time UInt16, -load_event_time UInt16, -min_fps UInt8, -avg_fps UInt8, -max_fps UInt8, -min_cpu UInt8, -avg_cpu UInt8, -max_cpu UInt8, -min_total_js_heap_size UInt64, -avg_total_js_heap_size UInt64, -max_total_js_heap_size UInt64, -min_used_js_heap_size UInt64, -avg_used_js_heap_size UInt64, -max_used_js_heap_size UInt64, -method Enum8('GET' = 0, 'HEAD' = 1, 'POST' = 2, 'PUT' = 3, 'DELETE' = 4, 'CONNECT' = 5, 'OPTIONS' = 6, 'TRACE' = 7, 'PATCH' = 8), -status UInt16, -success UInt8, -request_body String, -response_body String, -transfer_size UInt32, -selector String, -normalized_x Float32, -normalized_y Float32, -message_id UInt64 -) DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events', + "$properties" JSON DEFAULT '{}' COMMENT 'these properties belongs to the auto-captured events', description String DEFAULT '', group_id1 Array(String) DEFAULT [], group_id2 Array(String) DEFAULT [], @@ -745,8 +690,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples ENGINE = ReplacingMergeTree(_timestamp) ORDER BY (project_id, property_name, is_event_property); -- Incremental materialized view to get random examples of property values using $properties & properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv - REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS SELECT project_id, property_name, TRUE AS is_event_property,