diff --git a/api/chalicelib/core/product_analytics/events.py b/api/chalicelib/core/product_analytics/events.py index 26f71834c..f902d91d5 100644 --- a/api/chalicelib/core/product_analytics/events.py +++ b/api/chalicelib/core/product_analytics/events.py @@ -7,30 +7,69 @@ from chalicelib.utils.ch_client import ClickHouseClient from chalicelib.utils.exp_ch_helper import get_sub_condition logger = logging.getLogger(__name__) +PREDEFINED_EVENTS = { + "CLICK": "String", + "INPUT": "String", + "LOCATION": "String", + "ERROR": "String", + "PERFORMANCE": "String", + "REQUEST": "String" +} def get_events(project_id: int, page: schemas.PaginatedSchema): with ClickHouseClient() as ch_client: r = ch_client.format( - """SELECT DISTINCT ON(event_name,auto_captured) - COUNT(1) OVER () AS total, - event_name AS name, display_name, description, - auto_captured - FROM product_analytics.all_events - WHERE project_id=%(project_id)s - ORDER BY auto_captured,display_name - LIMIT %(limit)s OFFSET %(offset)s;""", + """SELECT DISTINCT + ON(event_name,auto_captured) + COUNT (1) OVER () AS total, + event_name AS name, display_name, description, + auto_captured + FROM product_analytics.all_events + WHERE project_id=%(project_id)s + ORDER BY auto_captured, display_name + LIMIT %(limit)s + OFFSET %(offset)s;""", parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit}) rows = ch_client.execute(r) if len(rows) == 0: - return {"total": 0, "list": []} + return {"total": len(PREDEFINED_EVENTS), "list": [{ + "name": e, + "displayName": "", + "description": "", + "autoCaptured": True, + "id": "event_0", + "dataType": "string", + "possibleTypes": [ + "string" + ], + "_foundInPredefinedList": False + } for e in PREDEFINED_EVENTS]} total = rows[0]["total"] + rows = helper.list_to_camel_case(rows) for i, row in enumerate(rows): row["id"] = f"event_{i}" row["dataType"] = "string" row["possibleTypes"] = ["string"] + row["_foundInPredefinedList"] = True row.pop("total") - return {"total": total, "list": helper.list_to_camel_case(rows)} + keys = [r["name"] for r in rows] + for e in PREDEFINED_EVENTS: + if e not in keys: + total += 1 + rows.append({ + "name": e, + "displayName": "", + "description": "", + "autoCaptured": True, + "id": "event_0", + "dataType": "string", + "possibleTypes": [ + "string" + ], + "_foundInPredefinedList": False + }) + return {"total": total, "list": rows} def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): @@ -119,21 +158,23 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema): def get_lexicon(project_id: int, page: schemas.PaginatedSchema): with ClickHouseClient() as ch_client: r = ch_client.format( - """SELECT COUNT(1) OVER () AS total, - all_events.event_name AS name, - * - FROM product_analytics.all_events - WHERE project_id=%(project_id)s - ORDER BY display_name - LIMIT %(limit)s OFFSET %(offset)s;""", + """SELECT COUNT(1) OVER () AS total, all_events.event_name AS name, + * + FROM product_analytics.all_events + WHERE project_id = %(project_id)s + ORDER BY display_name + LIMIT %(limit)s + OFFSET %(offset)s;""", parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit}) rows = ch_client.execute(r) if len(rows) == 0: return {"total": 0, "list": []} total = rows[0]["total"] + rows = helper.list_to_camel_case(rows) for i, row in enumerate(rows): row["id"] = f"event_{i}" row["dataType"] = "string" row["possibleTypes"] = ["string"] + row["_foundInPredefinedList"] = True row.pop("total") - return {"total": total, "list": helper.list_to_camel_case(rows)} + return {"total": total, "list": rows} diff --git a/api/chalicelib/core/product_analytics/properties.py b/api/chalicelib/core/product_analytics/properties.py index a34810d69..c88fe1c7d 100644 --- a/api/chalicelib/core/product_analytics/properties.py +++ b/api/chalicelib/core/product_analytics/properties.py @@ -2,7 +2,7 @@ import schemas from chalicelib.utils import helper, exp_ch_helper from chalicelib.utils.ch_client import ClickHouseClient -PREDEFINED_PROPERTY_TYPES = { +PREDEFINED_PROPERTIES = { "label": "String", "hesitation_time": "UInt32", "name": "String", @@ -62,15 +62,16 @@ PREDEFINED_PROPERTY_TYPES = { def get_all_properties(project_id: int, page: schemas.PaginatedSchema): with ClickHouseClient() as ch_client: r = ch_client.format( - """SELECT COUNT(1) OVER () AS total, - property_name AS name, display_name, - array_agg(DISTINCT event_properties.value_type) AS possible_types - FROM product_analytics.all_properties + """SELECT COUNT(1) OVER () AS total, property_name AS name, + display_name, + array_agg(DISTINCT event_properties.value_type) AS possible_types + FROM product_analytics.all_properties LEFT JOIN product_analytics.event_properties USING (project_id, property_name) - WHERE all_properties.project_id=%(project_id)s - GROUP BY property_name,display_name - ORDER BY display_name - LIMIT %(limit)s OFFSET %(offset)s;""", + WHERE all_properties.project_id = %(project_id)s + GROUP BY property_name, display_name + ORDER BY display_name + LIMIT %(limit)s + OFFSET %(offset)s;""", parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit}) @@ -82,35 +83,48 @@ def get_all_properties(project_id: int, page: schemas.PaginatedSchema): for i, p in enumerate(properties): p["id"] = f"prop_{i}" p["_foundInPredefinedList"] = False - if p["name"] in PREDEFINED_PROPERTY_TYPES: - p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]]) + if p["name"] in PREDEFINED_PROPERTIES: + p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTIES[p["name"]]) p["_foundInPredefinedList"] = True p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]))) p.pop("total") + keys = [p["name"] for p in properties] + for p in PREDEFINED_PROPERTIES: + if p not in keys: + total += 1 + properties.append({ + "name": p, + "displayName": "", + "possibleTypes": [ + ], + "id": f"prop_{len(properties) + 1}", + "_foundInPredefinedList": False, + "dataType": PREDEFINED_PROPERTIES[p] + }) return {"total": total, "list": properties} def get_event_properties(project_id: int, event_name): with ClickHouseClient() as ch_client: r = ch_client.format( - """SELECT all_properties.property_name AS name, - all_properties.display_name, - array_agg(DISTINCT event_properties.value_type) AS possible_types - FROM product_analytics.event_properties - INNER JOIN product_analytics.all_properties USING (property_name) - WHERE event_properties.project_id=%(project_id)s - AND all_properties.project_id=%(project_id)s - AND event_properties.event_name=%(event_name)s - GROUP BY ALL - ORDER BY 1;""", + """SELECT all_properties.property_name AS name, + all_properties.display_name, + array_agg(DISTINCT event_properties.value_type) AS possible_types + FROM product_analytics.event_properties + INNER JOIN product_analytics.all_properties USING (property_name) + WHERE event_properties.project_id = %(project_id)s + AND all_properties.project_id = %(project_id)s + AND event_properties.event_name = %(event_name)s + GROUP BY ALL + ORDER BY 1;""", parameters={"project_id": project_id, "event_name": event_name}) properties = ch_client.execute(r) properties = helper.list_to_camel_case(properties) for i, p in enumerate(properties): p["id"] = f"prop_{i}" p["_foundInPredefinedList"] = False - if p["name"] in PREDEFINED_PROPERTY_TYPES: - p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]]) + if p["name"] in PREDEFINED_PROPERTIES: + p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTIES[p["name"]]) p["_foundInPredefinedList"] = True p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"]))) @@ -120,24 +134,26 @@ def get_event_properties(project_id: int, event_name): def get_lexicon(project_id: int, page: schemas.PaginatedSchema): with ClickHouseClient() as ch_client: r = ch_client.format( - """SELECT COUNT(1) OVER () AS total, - all_properties.property_name AS name, - all_properties.*, - possible_types.values AS possible_types, - possible_values.values AS sample_values - FROM product_analytics.all_properties - LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value_type) AS values - FROM product_analytics.event_properties - WHERE project_id=%(project_id)s - GROUP BY 1, 2) AS possible_types - USING (project_id, property_name) - LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value) AS values - FROM product_analytics.property_values_samples - WHERE project_id=%(project_id)s - GROUP BY 1, 2) AS possible_values USING (project_id, property_name) - WHERE project_id=%(project_id)s - ORDER BY display_name - LIMIT %(limit)s OFFSET %(offset)s;""", + """SELECT COUNT(1) OVER () AS total, all_properties.property_name AS name, + all_properties.*, + possible_types.values AS possible_types, + possible_values.values AS sample_values + FROM product_analytics.all_properties + LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value_type) AS + values + FROM product_analytics.event_properties + WHERE project_id=%(project_id)s + GROUP BY 1, 2) AS possible_types + USING (project_id, property_name) + LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value) AS + values + FROM product_analytics.property_values_samples + WHERE project_id=%(project_id)s + GROUP BY 1, 2) AS possible_values USING (project_id, property_name) + WHERE project_id = %(project_id)s + ORDER BY display_name + LIMIT %(limit)s + OFFSET %(offset)s;""", parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit})