refactor(DB): track auto-captured properties

This commit is contained in:
Taha Yassine Kraiem 2025-06-02 15:35:40 +02:00
parent fd8b24cbd6
commit 32dedb9c15
4 changed files with 24 additions and 12 deletions

View file

@ -54,24 +54,27 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties
event_name String,
property_name String,
value_type String,
auto_captured BOOL,
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
ORDER BY (project_id, event_name, property_name, value_type);
ORDER BY (project_id, event_name, property_name, value_type, auto_captured);
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extractor_mv
TO product_analytics.event_properties AS
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name
UNION DISTINCT
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name;

View file

@ -676,10 +676,11 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties
event_name String,
property_name String,
value_type String,
auto_captured BOOL,
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
ORDER BY (project_id, event_name, property_name, value_type);
ORDER BY (project_id, event_name, property_name, value_type, auto_captured);
-- ----------------- This is experimental, if it doesn't work, we need to do it in db worker -------------
-- Incremental materialized view to fill event_properties using $properties & properties
@ -688,14 +689,16 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extrac
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name
UNION DISTINCT
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name;
-- -------- END ---------

View file

@ -65,24 +65,27 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties
event_name String,
property_name String,
value_type String,
auto_captured BOOL,
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
ORDER BY (project_id, event_name, property_name, value_type);
ORDER BY (project_id, event_name, property_name, value_type, auto_captured);
CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extractor_mv
TO product_analytics.event_properties AS
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name
UNION DISTINCT
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name;

View file

@ -573,10 +573,11 @@ CREATE TABLE IF NOT EXISTS product_analytics.event_properties
event_name String,
property_name String,
value_type String,
auto_captured BOOL,
_timestamp DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(_timestamp)
ORDER BY (project_id, event_name, property_name, value_type);
ORDER BY (project_id, event_name, property_name, value_type, auto_captured);
-- ----------------- This is experimental, if it doesn't work, we need to do it in db worker -------------
-- Incremental materialized view to fill event_properties using $properties & properties
@ -585,14 +586,16 @@ CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.event_properties_extrac
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`$properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name
UNION DISTINCT
SELECT project_id,
`$event_name` AS event_name,
property_name,
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type
toString(JSONType(JSONExtractRaw(toString(`properties`), property_name))) AS value_type,
`$auto_captured` AS auto_captured
FROM product_analytics.events
ARRAY JOIN JSONExtractKeys(toString(`properties`)) as property_name;
-- -------- END ---------