diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql b/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql index fea95d46b..121237e4f 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql @@ -64,16 +64,14 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extrac SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`$properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type FROM product_analytics.events - ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name; - -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_cproperties_extractor - TO product_analytics.event_properties AS + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +UNION DISTINCT SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name; @@ -118,10 +116,8 @@ FROM product_analytics.events WHERE (all_properties.display_name != '' OR all_properties.description != '') AND is_event_property) AS old_data - ON (events.project_id = old_data.project_id AND property_name = old_data.property_name); - -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_cproperties_extractor_mv - TO product_analytics.all_properties AS + ON (events.project_id = old_data.project_id AND property_name = old_data.property_name) +UNION DISTINCT SELECT project_id, property_name, TRUE AS is_event_property, @@ -168,7 +164,7 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name -UNION ALL +UNION DISTINCT SELECT project_id, property_name, TRUE AS is_event_property, @@ -238,6 +234,16 @@ SELECT project_id, _timestamp FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH +UNION DISTINCT +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) AND _timestamp > now() - INTERVAL 1 MONTH; diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index 71bd6ad55..a5e39f8ce 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -682,23 +682,20 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties ORDER BY (project_id, event_name, property_name, value_type); -- ----------------- This is experimental, if it doesn't work, we need to do it in db worker ------------- --- Incremental materialized view to fill event_properties using $properties +-- Incremental materialized view to fill event_properties using $properties & properties CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extractor_mv TO product_analytics.event_properties AS SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`$properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type FROM product_analytics.events - ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name; - --- Incremental materialized view to fill event_properties using properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_cproperties_extractor - TO product_analytics.event_properties AS + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +UNION DISTINCT SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name; -- -------- END --------- @@ -724,7 +721,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.all_properties -- ----------------- This is experimental, if it doesn't work, we need to do it in db worker ------------- --- Incremental materialized view to fill all_properties using $properties +-- Incremental materialized view to fill all_properties using $properties and properties CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_properties_extractor_mv TO product_analytics.all_properties AS SELECT project_id, @@ -748,11 +745,8 @@ FROM product_analytics.events WHERE (all_properties.display_name != '' OR all_properties.description != '') AND is_event_property) AS old_data - ON (events.project_id = old_data.project_id AND property_name = old_data.property_name); - --- Incremental materialized view to fill all_properties using properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_cproperties_extractor_mv - TO product_analytics.all_properties AS + ON (events.project_id = old_data.project_id AND property_name = old_data.property_name) +UNION DISTINCT SELECT project_id, property_name, TRUE AS is_event_property, @@ -802,7 +796,7 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name -UNION ALL +UNION DISTINCT -- using union because each table should be the target of 1 single refreshable MV SELECT project_id, property_name, @@ -843,7 +837,8 @@ CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events_grouped ORDER BY (project_id, value) TTL _timestamp + INTERVAL 1 MONTH; -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_grouped_mvREFRESHEVERY30MINUTETOproduct_analytics.autocomplete_events_grouped AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_events_grouped AS SELECT project_id, value, count(1) AS data_count, @@ -872,8 +867,17 @@ SELECT project_id, _timestamp FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name -WHERE length(value) > 0 - AND isNull(toFloat64OrNull(value)) +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH +UNION DISTINCT +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) AND _timestamp > now() - INTERVAL 1 MONTH; @@ -889,7 +893,8 @@ CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties_group ORDER BY (project_id, event_name, property_name, value) TTL _timestamp + INTERVAL 1 MONTH; -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped_mvREFRESHEVERY30MINUTETOproduct_analytics.autocomplete_event_properties_grouped AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_event_properties_grouped AS SELECT project_id, event_name, property_name, diff --git a/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql b/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql index baef5f007..8c3c6475b 100644 --- a/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql +++ b/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql @@ -75,16 +75,14 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extrac SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`$properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type FROM product_analytics.events - ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name; - -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_cproperties_extractor - TO product_analytics.event_properties AS + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +UNION DISTINCT SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name; @@ -129,10 +127,8 @@ FROM product_analytics.events WHERE (all_properties.display_name != '' OR all_properties.description != '') AND is_event_property) AS old_data - ON (events.project_id = old_data.project_id AND property_name = old_data.property_name); - -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_cproperties_extractor_mv - TO product_analytics.all_properties AS + ON (events.project_id = old_data.project_id AND property_name = old_data.property_name) +UNION DISTINCT SELECT project_id, property_name, TRUE AS is_event_property, @@ -179,7 +175,7 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name -UNION ALL +UNION DISTINCT SELECT project_id, property_name, TRUE AS is_event_property, @@ -250,8 +246,17 @@ SELECT project_id, _timestamp FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name -WHERE length(value) > 0 - AND isNull(toFloat64OrNull(value)) +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH +UNION DISTINCT +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) AND _timestamp > now() - INTERVAL 1 MONTH; diff --git a/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index c8c071d7b..a9d19e6a4 100644 --- a/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -222,6 +222,7 @@ CREATE TABLE IF NOT EXISTS experimental.ios_events SET allow_experimental_json_type = 1; +SET enable_json_type = 1; CREATE DATABASE IF NOT EXISTS product_analytics; @@ -578,23 +579,20 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties ORDER BY (project_id, event_name, property_name, value_type); -- ----------------- This is experimental, if it doesn't work, we need to do it in db worker ------------- --- Incremental materialized view to fill event_properties using $properties +-- Incremental materialized view to fill event_properties using $properties & properties CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extractor_mv TO product_analytics.event_properties AS SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`$properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type FROM product_analytics.events - ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name; - --- Incremental materialized view to fill event_properties using properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_cproperties_extractor - TO product_analytics.event_properties AS + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +UNION DISTINCT SELECT project_id, `$event_name` AS event_name, property_name, - JSONType(JSONExtractRaw(toString(`properties`), property_name)) AS value_type + toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name; -- -------- END --------- @@ -620,7 +618,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.all_properties -- ----------------- This is experimental, if it doesn't work, we need to do it in db worker ------------- --- Incremental materialized view to fill all_properties using $properties +-- Incremental materialized view to fill all_properties using $properties and properties CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_properties_extractor_mv TO product_analytics.all_properties AS SELECT project_id, @@ -644,11 +642,8 @@ FROM product_analytics.events WHERE (all_properties.display_name != '' OR all_properties.description != '') AND is_event_property) AS old_data - ON (events.project_id = old_data.project_id AND property_name = old_data.property_name); - --- Incremental materialized view to fill all_properties using properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_cproperties_extractor_mv - TO product_analytics.all_properties AS + ON (events.project_id = old_data.project_id AND property_name = old_data.property_name) +UNION DISTINCT SELECT project_id, property_name, TRUE AS is_event_property, @@ -698,7 +693,7 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name -UNION ALL +UNION DISTINCT -- using union because each table should be the target of 1 single refreshable MV SELECT project_id, property_name, @@ -769,6 +764,16 @@ SELECT project_id, _timestamp FROM product_analytics.events ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH +UNION DISTINCT +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) AND _timestamp > now() - INTERVAL 1 MONTH; @@ -797,4 +802,3 @@ FROM product_analytics.autocomplete_event_properties WHERE length(value) > 0 AND autocomplete_event_properties._timestamp > now() - INTERVAL 1 MONTH GROUP BY project_id, event_name, property_name, value; -