From 20a57d7ca160140ca12ecf80a09bf6a302e26a37 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 26 Mar 2025 13:27:06 +0100 Subject: [PATCH] feat(chalice): initial lexicon for events & properties --- .../core/product_analytics/events.py | 23 ++++++++++++ .../core/product_analytics/properties.py | 35 +++++++++++++++++++ api/routers/subs/product_analytics.py | 12 +++++++ 3 files changed, 70 insertions(+) diff --git a/api/chalicelib/core/product_analytics/events.py b/api/chalicelib/core/product_analytics/events.py index 37a4c5ebb..9dc157203 100644 --- a/api/chalicelib/core/product_analytics/events.py +++ b/api/chalicelib/core/product_analytics/events.py @@ -111,3 +111,26 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): for r in rows: r.pop("total") return {"total": total, "rows": rows, "src": 2} + + +def get_lexicon(project_id: int, page: schemas.PaginatedSchema): + with ClickHouseClient() as ch_client: + r = ch_client.format( + """SELECT COUNT(1) OVER () AS total, + all_events.event_name AS name, + * + FROM product_analytics.all_events + WHERE project_id=%(project_id)s + ORDER BY display_name + LIMIT %(limit)s OFFSET %(offset)s;""", + parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit}) + rows = ch_client.execute(r) + if len(rows) == 0: + return {"total": 0, "list": []} + total = rows[0]["total"] + for i, row in enumerate(rows): + row["id"] = f"event_{i}" + row["icon"] = None + row["possibleTypes"] = ["String"] + row.pop("total") + return {"total": total, "list": helper.list_to_camel_case(rows)} diff --git a/api/chalicelib/core/product_analytics/properties.py b/api/chalicelib/core/product_analytics/properties.py index a62f243f2..e88f51de1 100644 --- a/api/chalicelib/core/product_analytics/properties.py +++ b/api/chalicelib/core/product_analytics/properties.py @@ -44,3 +44,38 @@ def get_event_properties(project_id: int, event_name): properties = ch_client.execute(r) return helper.list_to_camel_case(properties) + + +def get_lexicon(project_id: int, page: schemas.PaginatedSchema): + with ClickHouseClient() as ch_client: + r = ch_client.format( + """SELECT COUNT(1) OVER () AS total, + all_properties.property_name AS name, + all_properties.*, + possible_types.values AS possible_types, + possible_values.values AS sample_values + FROM product_analytics.all_properties + LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value_type) AS values + FROM product_analytics.event_properties + WHERE project_id=%(project_id)s + GROUP BY 1, 2) AS possible_types + USING (project_id, property_name) + LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value) AS values + FROM product_analytics.property_values_samples + WHERE project_id=%(project_id)s + GROUP BY 1, 2) AS possible_values USING (project_id, property_name) + WHERE project_id=%(project_id)s + ORDER BY display_name + LIMIT %(limit)s OFFSET %(offset)s;""", + parameters={"project_id": project_id, + "limit": page.limit, + "offset": (page.page - 1) * page.limit}) + properties = ch_client.execute(r) + if len(properties) == 0: + return {"total": 0, "list": []} + total = properties[0]["total"] + for i, p in enumerate(properties): + p["id"] = f"prop_{i}" + p["icon"] = None + p.pop("total") + return {"total": total, "list": helper.list_to_camel_case(properties)} diff --git a/api/routers/subs/product_analytics.py b/api/routers/subs/product_analytics.py index aaebf788b..5b18ca93e 100644 --- a/api/routers/subs/product_analytics.py +++ b/api/routers/subs/product_analytics.py @@ -41,3 +41,15 @@ def get_event_properties(projectId: int, event_name: str = None, def search_events(projectId: int, data: schemas.EventsSearchPayloadSchema = Body(...), context: schemas.CurrentContext = Depends(OR_context)): return {"data": events.search_events(project_id=projectId, data=data)} + + +@app.get('/{projectId}/lexicon/events', tags=["product_analytics", "lexicon"]) +def get_all_lexicon_events(projectId: int, filter_query: Annotated[schemas.PaginatedSchema, Query()], + context: schemas.CurrentContext = Depends(OR_context)): + return {"data": events.get_lexicon(project_id=projectId, page=filter_query)} + + +@app.get('/{projectId}/lexicon/properties', tags=["product_analytics", "lexicon"]) +def get_all_lexicon_properties(projectId: int, filter_query: Annotated[schemas.PaginatedSchema, Query()], + context: schemas.CurrentContext = Depends(OR_context)): + return {"data": properties.get_lexicon(project_id=projectId, page=filter_query)}