feat(DB): use incremental&refreshable materialized views to fill extra tables
This commit is contained in:
parent
d087736df0
commit
bb17f672fe
1 changed files with 113 additions and 38 deletions
|
|
@ -143,6 +143,17 @@ CREATE TABLE IF NOT EXISTS experimental.sessions
|
|||
TTL datetime + INTERVAL 1 MONTH
|
||||
SETTINGS index_granularity = 512;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS experimental.user_viewed_sessions
|
||||
(
|
||||
project_id UInt16,
|
||||
user_id UInt32,
|
||||
session_id UInt64,
|
||||
_timestamp DateTime DEFAULT now()
|
||||
) ENGINE = ReplacingMergeTree(_timestamp)
|
||||
PARTITION BY toYYYYMM(_timestamp)
|
||||
ORDER BY (project_id, user_id, session_id)
|
||||
TTL _timestamp + INTERVAL 3 MONTH;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS experimental.issues
|
||||
(
|
||||
project_id UInt16,
|
||||
|
|
@ -518,8 +529,9 @@ CREATE TABLE IF NOT EXISTS product_analytics.all_events
|
|||
ORDER BY (project_id, auto_captured, event_name);
|
||||
|
||||
-- ----------------- This is experimental, if it doesn't work, we need to do it in db worker -------------
|
||||
-- Incremental materialized view to fill event_properties using $properties
|
||||
CREATE MATERIALIZED VIEW product_analytics.events_extractor TO product_analytics.all_events AS
|
||||
-- Incremental materialized view to fill all_events using $properties
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_events_extractor_mv
|
||||
TO product_analytics.all_events AS
|
||||
SELECT DISTINCT ON (project_id,auto_captured,event_name) project_id,
|
||||
`$auto_captured` AS auto_captured,
|
||||
`$event_name` AS event_name,
|
||||
|
|
@ -545,8 +557,7 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties
|
|||
project_id UInt16,
|
||||
event_name String,
|
||||
property_name String,
|
||||
-- TODO: find a fix for this
|
||||
-- value_type String,
|
||||
value_type String,
|
||||
|
||||
_timestamp DateTime DEFAULT now()
|
||||
) ENGINE = ReplacingMergeTree(_timestamp)
|
||||
|
|
@ -554,18 +565,22 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties
|
|||
|
||||
-- ----------------- This is experimental, if it doesn't work, we need to do it in db worker -------------
|
||||
-- Incremental materialized view to fill event_properties using $properties
|
||||
CREATE MATERIALIZED VIEW product_analytics.event_properties_extractor TO product_analytics.event_properties AS
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extractor_mv
|
||||
TO product_analytics.event_properties AS
|
||||
SELECT project_id,
|
||||
`$event_name` AS event_name,
|
||||
property_name
|
||||
property_name,
|
||||
JSONType(JSONExtractRaw(toString(`$properties`), property_name)) AS value_type
|
||||
FROM product_analytics.events
|
||||
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name;
|
||||
|
||||
-- Incremental materialized view to fill event_properties using properties
|
||||
CREATE MATERIALIZED VIEW product_analytics.event_cproperties_extractor TO product_analytics.event_properties AS
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_cproperties_extractor
|
||||
TO product_analytics.event_properties AS
|
||||
SELECT project_id,
|
||||
`$event_name` AS event_name,
|
||||
property_name
|
||||
property_name,
|
||||
JSONType(JSONExtractRaw(toString(`properties`), property_name)) AS value_type
|
||||
FROM product_analytics.events
|
||||
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name;
|
||||
-- -------- END ---------
|
||||
|
|
@ -590,32 +605,92 @@ CREATE TABLE IF NOT EXISTS product_analytics.all_properties
|
|||
ORDER BY (project_id, property_name, is_event_property);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS experimental.user_viewed_sessions
|
||||
-- ----------------- This is experimental, if it doesn't work, we need to do it in db worker -------------
|
||||
-- Incremental materialized view to fill all_properties using $properties
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_properties_extractor_mv
|
||||
TO product_analytics.all_properties AS
|
||||
SELECT project_id,
|
||||
property_name,
|
||||
TRUE AS is_event_property,
|
||||
display_name,
|
||||
description,
|
||||
status,
|
||||
data_count,
|
||||
query_count
|
||||
FROM product_analytics.events
|
||||
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name
|
||||
LEFT JOIN (SELECT project_id,
|
||||
property_name,
|
||||
display_name,
|
||||
description,
|
||||
status,
|
||||
data_count,
|
||||
query_count
|
||||
FROM product_analytics.all_properties
|
||||
WHERE (all_properties.display_name != ''
|
||||
OR all_properties.description != '')
|
||||
AND is_event_property) AS old_data
|
||||
ON (events.project_id = old_data.project_id AND property_name = old_data.property_name);
|
||||
|
||||
-- Incremental materialized view to fill all_properties using properties
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.all_cproperties_extractor_mv
|
||||
TO product_analytics.all_properties AS
|
||||
SELECT project_id,
|
||||
property_name,
|
||||
TRUE AS is_event_property,
|
||||
display_name,
|
||||
description,
|
||||
status,
|
||||
data_count,
|
||||
query_count
|
||||
FROM product_analytics.events
|
||||
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name
|
||||
LEFT JOIN (SELECT project_id,
|
||||
property_name,
|
||||
display_name,
|
||||
description,
|
||||
status,
|
||||
data_count,
|
||||
query_count
|
||||
FROM product_analytics.all_properties
|
||||
WHERE (all_properties.display_name != ''
|
||||
OR all_properties.description != '')
|
||||
AND is_event_property) AS old_data
|
||||
ON (events.project_id = old_data.project_id AND property_name = old_data.property_name);
|
||||
-- -------- END ---------
|
||||
|
||||
-- Some random examples of property-values, limited by 2 per property
|
||||
-- Experimental: This table is filled by a refreshable materialized view
|
||||
CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples
|
||||
(
|
||||
project_id UInt16,
|
||||
user_id UInt32,
|
||||
session_id UInt64,
|
||||
_timestamp DateTime DEFAULT now()
|
||||
) ENGINE = ReplacingMergeTree(_timestamp)
|
||||
PARTITION BY toYYYYMM(_timestamp)
|
||||
ORDER BY (project_id, user_id, session_id)
|
||||
TTL _timestamp + INTERVAL 3 MONTH;
|
||||
|
||||
CREATE TABLE product_analytics.properties_example_values
|
||||
(
|
||||
id UInt64,
|
||||
property_name String,
|
||||
is_event_property BOOL,
|
||||
value String,
|
||||
random_value Float64
|
||||
)
|
||||
ENGINE = ReplacingMergeTree()
|
||||
ORDER BY random_value;
|
||||
|
||||
-- Incremental materialized view to get random examples of property values
|
||||
CREATE MATERIALIZED VIEW product_analytics.properties_values_random_sampler TO product_analytics.properties_example_values AS
|
||||
SELECT id,
|
||||
value,
|
||||
rand() AS random_value
|
||||
_timestamp DateTime DEFAULT now()
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(_timestamp)
|
||||
ORDER BY (project_id, property_name, is_event_property);
|
||||
-- Incremental materialized view to get random examples of property values using $properties & properties
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS
|
||||
SELECT project_id,
|
||||
property_name,
|
||||
TRUE AS is_event_property,
|
||||
JSONExtractString(toString(`$properties`), property_name) AS value
|
||||
FROM product_analytics.events
|
||||
WHERE rand() < 0.5 -- This randomly skips ~50% of inserts
|
||||
ORDER BY random_value
|
||||
LIMIT 10;
|
||||
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name
|
||||
WHERE randCanonical() < 0.5 -- This randomly skips inserts
|
||||
AND value != ''
|
||||
LIMIT 2 BY project_id,property_name
|
||||
UNION ALL
|
||||
-- using union because each table should be the target of 1 single refreshable MV
|
||||
SELECT project_id,
|
||||
property_name,
|
||||
TRUE AS is_event_property,
|
||||
JSONExtractString(toString(`properties`), property_name) AS value
|
||||
FROM product_analytics.events
|
||||
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name
|
||||
WHERE randCanonical() < 0.5 -- This randomly skips inserts
|
||||
AND value != ''
|
||||
LIMIT 2 BY project_id,property_name;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue