From defaaf0c308a43ae3f65e8556e70aa87124b4242 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Thu, 8 May 2025 18:51:44 +0200 Subject: [PATCH] refactor(chalice): autocomplete for event-names refactor(chalice): autocomplete for properties-names refactor(chalice): autocomplete for properties-values --- .../core/product_analytics/autocomplete.py | 57 ++++++++++++ .../core/product_analytics/events.py | 4 +- .../core/sessions/sessions_search_ch.py | 4 +- .../core/sessions/sessions_search_pg.py | 10 +- api/routers/subs/product_analytics.py | 20 +++- .../db/init_dbs/clickhouse/1.23.0/1.23.0.sql | 89 ++++++++++++++++++ .../clickhouse/create/init_schema.sql | 92 +++++++++++++++++- .../db/init_dbs/clickhouse/1.23.0/1.23.0.sql | 93 ++++++++++++++++++- .../clickhouse/create/init_schema.sql | 92 +++++++++++++++++- 9 files changed, 448 insertions(+), 13 deletions(-) create mode 100644 api/chalicelib/core/product_analytics/autocomplete.py diff --git a/api/chalicelib/core/product_analytics/autocomplete.py b/api/chalicelib/core/product_analytics/autocomplete.py new file mode 100644 index 000000000..5915a8ab6 --- /dev/null +++ b/api/chalicelib/core/product_analytics/autocomplete.py @@ -0,0 +1,57 @@ +from typing import Optional + +from chalicelib.utils import helper +from chalicelib.utils.ch_client import ClickHouseClient + + +def search_events(project_id: int, q: Optional[str] = None): + with ClickHouseClient() as ch_client: + full_args = {"project_id": project_id, "limit": 20} + + constraints = ["project_id = %(project_id)s", + "_timestamp >= now()-INTERVAL 1 MONTH"] + if q: + constraints += ["value ILIKE %(q)s"] + full_args["q"] = helper.string_to_sql_like(q) + query = ch_client.format( + f"""SELECT value,data_count + FROM product_analytics.autocomplete_events_grouped + WHERE {" AND ".join(constraints)} + ORDER BY data_count DESC + LIMIT %(limit)s;""", + parameters=full_args) + rows = ch_client.execute(query) + + return {"values": helper.list_to_camel_case(rows), "_src": 2} + + +def search_properties(project_id: int, property_name: Optional[str] = None, event_name: Optional[str] = None, + q: Optional[str] = None): + with ClickHouseClient() as ch_client: + select = "value" + full_args = {"project_id": project_id, "limit": 20, + "event_name": event_name, "property_name": property_name} + + constraints = ["project_id = %(project_id)s", + "_timestamp >= now()-INTERVAL 1 MONTH"] + if event_name: + constraints += ["event_name = %(event_name)s"] + if property_name and q: + constraints += ["property_name = %(property_name)s"] + elif property_name: + select = "DISTINCT ON(property_name) property_name AS value" + constraints += ["property_name ILIKE %(property_name)s"] + full_args["property_name"] = helper.string_to_sql_like(property_name) + if q: + constraints += ["value ILIKE %(q)s"] + full_args["q"] = helper.string_to_sql_like(q) + query = ch_client.format( + f"""SELECT {select},data_count + FROM product_analytics.autocomplete_event_properties_grouped + WHERE {" AND ".join(constraints)} + ORDER BY data_count DESC + LIMIT %(limit)s;""", + parameters=full_args) + rows = ch_client.execute(query) + + return {"values": helper.list_to_camel_case(rows), "_src": 2} diff --git a/api/chalicelib/core/product_analytics/events.py b/api/chalicelib/core/product_analytics/events.py index f902d91d5..10e578c7d 100644 --- a/api/chalicelib/core/product_analytics/events.py +++ b/api/chalicelib/core/product_analytics/events.py @@ -148,11 +148,11 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): parameters=full_args) rows = ch_client.execute(query) if len(rows) == 0: - return {"total": 0, "rows": [], "src": 2} + return {"total": 0, "rows": [], "_src": 2} total = rows[0]["total"] for r in rows: r.pop("total") - return {"total": total, "rows": rows, "src": 2} + return {"total": total, "rows": rows, "_src": 2} def get_lexicon(project_id: int, page: schemas.PaginatedSchema): diff --git a/api/chalicelib/core/sessions/sessions_search_ch.py b/api/chalicelib/core/sessions/sessions_search_ch.py index 38ada500d..c0142bae4 100644 --- a/api/chalicelib/core/sessions/sessions_search_ch.py +++ b/api/chalicelib/core/sessions/sessions_search_ch.py @@ -73,7 +73,7 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project: schemas. return { 'total': 0, 'sessions': [], - 'src': 2 + '_src': 2 } if project.platform == "web": full_args, query_part = sessions.search_query_parts_ch(data=data, error_status=error_status, @@ -216,7 +216,7 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project: schemas. return { 'total': total, 'sessions': sessions_list, - 'src': 2 + '_src': 2 } diff --git a/api/chalicelib/core/sessions/sessions_search_pg.py b/api/chalicelib/core/sessions/sessions_search_pg.py index f28af757a..9036e2686 100644 --- a/api/chalicelib/core/sessions/sessions_search_pg.py +++ b/api/chalicelib/core/sessions/sessions_search_pg.py @@ -49,7 +49,7 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project: schemas. return { 'total': 0, 'sessions': [], - 'src': 1 + '_src': 1 } full_args, query_part = sessions_legacy.search_query_parts(data=data, error_status=error_status, errors_only=errors_only, @@ -177,7 +177,7 @@ def search_sessions(data: schemas.SessionsSearchPayloadSchema, project: schemas. return { 'total': total, 'sessions': helper.list_to_camel_case(sessions), - 'src': 1 + '_src': 1 } @@ -240,7 +240,7 @@ def search_by_metadata(tenant_id, user_id, m_key, m_value, project_id=None): cur.execute("\nUNION\n".join(sub_queries)) rows = cur.fetchall() for i in rows: - i["src"] = 1 + i["_src"] = 1 results[str(i["project_id"])]["sessions"].append(helper.dict_to_camel_case(i)) return results @@ -248,7 +248,7 @@ def search_by_metadata(tenant_id, user_id, m_key, m_value, project_id=None): def search_sessions_by_ids(project_id: int, session_ids: list, sort_by: str = 'session_id', ascending: bool = False) -> dict: if session_ids is None or len(session_ids) == 0: - return {"total": 0, "sessions": [], "src": 1} + return {"total": 0, "sessions": [], "_src": 1} with pg_client.PostgresClient() as cur: meta_keys = metadata.get(project_id=project_id) params = {"project_id": project_id, "session_ids": tuple(session_ids)} @@ -267,4 +267,4 @@ def search_sessions_by_ids(project_id: int, session_ids: list, sort_by: str = 's s["metadata"] = {} for m in meta_keys: s["metadata"][m["key"]] = s.pop(f'metadata_{m["index"]}') - return {"total": len(rows), "sessions": helper.list_to_camel_case(rows), "src": 1} + return {"total": len(rows), "sessions": helper.list_to_camel_case(rows), "_src": 1} diff --git a/api/routers/subs/product_analytics.py b/api/routers/subs/product_analytics.py index 5b18ca93e..d7dbcba23 100644 --- a/api/routers/subs/product_analytics.py +++ b/api/routers/subs/product_analytics.py @@ -4,9 +4,10 @@ from fastapi import Body, Depends, Query import schemas from chalicelib.core import metadata -from chalicelib.core.product_analytics import events, properties +from chalicelib.core.product_analytics import events, properties, autocomplete from or_dependencies import OR_context from routers.base import get_routers +from typing import Optional public_app, app, app_apikey = get_routers() @@ -53,3 +54,20 @@ def get_all_lexicon_events(projectId: int, filter_query: Annotated[schemas.Pagin def get_all_lexicon_properties(projectId: int, filter_query: Annotated[schemas.PaginatedSchema, Query()], context: schemas.CurrentContext = Depends(OR_context)): return {"data": properties.get_lexicon(project_id=projectId, page=filter_query)} + + +@app.get('/{projectId}/events/autocomplete', tags=["autocomplete"]) +def autocomplete_events(projectId: int, q: Optional[str] = None, + context: schemas.CurrentContext = Depends(OR_context)): + return {"data": autocomplete.search_events(project_id=projectId, q=None if not q or len(q) == 0 else q)} + + +@app.get('/{projectId}/properties/autocomplete', tags=["autocomplete"]) +def autocomplete_properties(projectId: int, propertyName: str, eventName: Optional[str] = None, + q: Optional[str] = None, context: schemas.CurrentContext = Depends(OR_context)): + return {"data": autocomplete.search_properties(project_id=projectId, + event_name=None if not eventName \ + or len(eventName) == 0 else eventName, + property_name=None if not propertyName \ + or len(propertyName) == 0 else propertyName, + q=None if not q or len(q) == 0 else q)} diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql b/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql index 889bb4d49..953c86662 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql @@ -165,3 +165,92 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name; + +-- Autocomplete + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events +( + project_id UInt16, + value String COMMENT 'The $event_name', + _timestamp DateTime +) ENGINE = MergeTree() + ORDER BY (project_id, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_mv + TO product_analytics.autocomplete_events AS +SELECT project_id, + `$event_name` AS value, + _timestamp +FROM product_analytics.events +WHERE _timestamp > now() - INTERVAL 1 MONTH; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events_grouped +( + project_id UInt16, + value String COMMENT 'The $event_name', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_events_grouped AS +SELECT project_id, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_events +WHERE autocomplete_events._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, value; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree() + ORDER BY (project_id, event_name, property_name, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_mv + TO product_analytics.autocomplete_event_properties AS +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`$properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH; + + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, event_name, property_name, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_event_properties_grouped AS +SELECT project_id, + event_name, + property_name, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_event_properties +WHERE length(value) > 0 + AND autocomplete_event_properties._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, event_name, property_name, value; + diff --git a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index f1c2fbb66..6c32c70c6 100644 --- a/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/ee/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -791,7 +791,8 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples ENGINE = ReplacingMergeTree(_timestamp) ORDER BY (project_id, property_name, is_event_property); -- Incremental materialized view to get random examples of property values using $properties & properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv + REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS SELECT project_id, property_name, TRUE AS is_event_property, @@ -812,3 +813,92 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name; + +-- Autocomplete + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events +( + project_id UInt16, + value String COMMENT 'The $event_name', + _timestamp DateTime +) ENGINE = MergeTree() + ORDER BY (project_id, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_mv + TO product_analytics.autocomplete_events AS +SELECT project_id, + `$event_name` AS value, + _timestamp +FROM product_analytics.events +WHERE _timestamp > now() - INTERVAL 1 MONTH; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events_grouped +( + project_id UInt16, + value String COMMENT 'The $event_name', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_events_grouped AS +SELECT project_id, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_events +WHERE autocomplete_events._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, value; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree() + ORDER BY (project_id, event_name, property_name, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_mv + TO product_analytics.autocomplete_event_properties AS +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`$properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH; + + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, event_name, property_name, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_event_properties_grouped AS +SELECT project_id, + event_name, + property_name, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_event_properties +WHERE length(value) > 0 + AND autocomplete_event_properties._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, event_name, property_name, value; + diff --git a/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql b/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql index cebb68586..d8472807d 100644 --- a/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql +++ b/scripts/schema/db/init_dbs/clickhouse/1.23.0/1.23.0.sql @@ -155,7 +155,8 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples ENGINE = ReplacingMergeTree(_timestamp) ORDER BY (project_id, property_name, is_event_property); -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv + REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS SELECT project_id, property_name, TRUE AS is_event_property, @@ -175,3 +176,93 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name; + + +-- Autocomplete + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events +( + project_id UInt16, + value String COMMENT 'The $event_name', + _timestamp DateTime +) ENGINE = MergeTree() + ORDER BY (project_id, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_mv + TO product_analytics.autocomplete_events AS +SELECT project_id, + `$event_name` AS value, + _timestamp +FROM product_analytics.events +WHERE _timestamp > now() - INTERVAL 1 MONTH; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events_grouped +( + project_id UInt16, + value String COMMENT 'The $event_name', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_events_grouped AS +SELECT project_id, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_events +WHERE autocomplete_events._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, value; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree() + ORDER BY (project_id, event_name, property_name, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_mv + TO product_analytics.autocomplete_event_properties AS +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`$properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH; + + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, event_name, property_name, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_event_properties_grouped AS +SELECT project_id, + event_name, + property_name, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_event_properties +WHERE length(value) > 0 + AND autocomplete_event_properties._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, event_name, property_name, value; + diff --git a/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql b/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql index 1bd5d0def..b9cb4b173 100644 --- a/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql +++ b/scripts/schema/db/init_dbs/clickhouse/create/init_schema.sql @@ -687,7 +687,8 @@ CREATE TABLE IF NOT EXISTS product_analytics.property_values_samples ENGINE = ReplacingMergeTree(_timestamp) ORDER BY (project_id, property_name, is_event_property); -- Incremental materialized view to get random examples of property values using $properties & properties -CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mvREFRESHEVERY30HOURTOproduct_analytics.property_values_samples AS +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.property_values_sampler_mv + REFRESH EVERY 30 HOUR TO product_analytics.property_values_samples AS SELECT project_id, property_name, TRUE AS is_event_property, @@ -708,3 +709,92 @@ FROM product_analytics.events WHERE randCanonical() < 0.5 -- This randomly skips inserts AND value != '' LIMIT 2 BY project_id,property_name; + +-- Autocomplete + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events +( + project_id UInt16, + value String COMMENT 'The $event_name', + _timestamp DateTime +) ENGINE = MergeTree() + ORDER BY (project_id, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_mv + TO product_analytics.autocomplete_events AS +SELECT project_id, + `$event_name` AS value, + _timestamp +FROM product_analytics.events +WHERE _timestamp > now() - INTERVAL 1 MONTH; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_events_grouped +( + project_id UInt16, + value String COMMENT 'The $event_name', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_events_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_events_grouped AS +SELECT project_id, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_events +WHERE autocomplete_events._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, value; + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + _timestamp DateTime DEFAULT now() +) ENGINE = MergeTree() + ORDER BY (project_id, event_name, property_name, value, _timestamp) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_mv + TO product_analytics.autocomplete_event_properties AS +SELECT project_id, + `$event_name` AS event_name, + property_name, + JSONExtractString(toString(`$properties`), property_name) AS value, + _timestamp +FROM product_analytics.events + ARRAY JOIN JSONExtractKeys(toString(`$properties`)) as property_name +WHERE length(value) > 0 AND isNull(toFloat64OrNull(value)) + AND _timestamp > now() - INTERVAL 1 MONTH; + + +CREATE TABLE IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped +( + project_id UInt16, + event_name String COMMENT 'The $event_name', + property_name String, + value String COMMENT 'The property-value as a string', + data_count UInt16 COMMENT 'The number of appearance during the past month', + _timestamp DateTime DEFAULT now() +) ENGINE = ReplacingMergeTree(_timestamp) + ORDER BY (project_id, event_name, property_name, value) + TTL _timestamp + INTERVAL 1 MONTH; + +CREATE MATERIALIZED VIEW IF NOT EXISTS product_analytics.autocomplete_event_properties_grouped_mv + REFRESH EVERY 30 MINUTE TO product_analytics.autocomplete_event_properties_grouped AS +SELECT project_id, + event_name, + property_name, + value, + count(1) AS data_count, + max(_timestamp) AS _timestamp +FROM product_analytics.autocomplete_event_properties +WHERE length(value) > 0 + AND autocomplete_event_properties._timestamp > now() - INTERVAL 1 MONTH +GROUP BY project_id, event_name, property_name, value; +