refactor(chalice): return all events & properties

This commit is contained in:
Taha Yassine Kraiem 2025-05-06 19:03:10 +02:00 committed by Kraiem Taha Yassine
parent 3ac5c30c5f
commit 1576208e25
2 changed files with 116 additions and 59 deletions

View file

@ -7,30 +7,69 @@ from chalicelib.utils.ch_client import ClickHouseClient
from chalicelib.utils.exp_ch_helper import get_sub_condition
logger = logging.getLogger(__name__)
PREDEFINED_EVENTS = {
"CLICK": "String",
"INPUT": "String",
"LOCATION": "String",
"ERROR": "String",
"PERFORMANCE": "String",
"REQUEST": "String"
}
def get_events(project_id: int, page: schemas.PaginatedSchema):
with ClickHouseClient() as ch_client:
r = ch_client.format(
"""SELECT DISTINCT ON(event_name,auto_captured)
COUNT(1) OVER () AS total,
"""SELECT DISTINCT
ON(event_name,auto_captured)
COUNT (1) OVER () AS total,
event_name AS name, display_name, description,
auto_captured
FROM product_analytics.all_events
WHERE project_id=%(project_id)s
ORDER BY auto_captured,display_name
LIMIT %(limit)s OFFSET %(offset)s;""",
ORDER BY auto_captured, display_name
LIMIT %(limit)s
OFFSET %(offset)s;""",
parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit})
rows = ch_client.execute(r)
if len(rows) == 0:
return {"total": 0, "list": []}
return {"total": len(PREDEFINED_EVENTS), "list": [{
"name": e,
"displayName": "",
"description": "",
"autoCaptured": True,
"id": "event_0",
"dataType": "string",
"possibleTypes": [
"string"
],
"_foundInPredefinedList": False
} for e in PREDEFINED_EVENTS]}
total = rows[0]["total"]
rows = helper.list_to_camel_case(rows)
for i, row in enumerate(rows):
row["id"] = f"event_{i}"
row["dataType"] = "string"
row["possibleTypes"] = ["string"]
row["_foundInPredefinedList"] = True
row.pop("total")
return {"total": total, "list": helper.list_to_camel_case(rows)}
keys = [r["name"] for r in rows]
for e in PREDEFINED_EVENTS:
if e not in keys:
total += 1
rows.append({
"name": e,
"displayName": "",
"description": "",
"autoCaptured": True,
"id": "event_0",
"dataType": "string",
"possibleTypes": [
"string"
],
"_foundInPredefinedList": False
})
return {"total": total, "list": rows}
def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema):
@ -119,21 +158,23 @@ def search_events(project_id: int, data: schemas.EventsSearchPayloadSchema):
def get_lexicon(project_id: int, page: schemas.PaginatedSchema):
with ClickHouseClient() as ch_client:
r = ch_client.format(
"""SELECT COUNT(1) OVER () AS total,
all_events.event_name AS name,
"""SELECT COUNT(1) OVER () AS total, all_events.event_name AS name,
*
FROM product_analytics.all_events
WHERE project_id=%(project_id)s
WHERE project_id = %(project_id)s
ORDER BY display_name
LIMIT %(limit)s OFFSET %(offset)s;""",
LIMIT %(limit)s
OFFSET %(offset)s;""",
parameters={"project_id": project_id, "limit": page.limit, "offset": (page.page - 1) * page.limit})
rows = ch_client.execute(r)
if len(rows) == 0:
return {"total": 0, "list": []}
total = rows[0]["total"]
rows = helper.list_to_camel_case(rows)
for i, row in enumerate(rows):
row["id"] = f"event_{i}"
row["dataType"] = "string"
row["possibleTypes"] = ["string"]
row["_foundInPredefinedList"] = True
row.pop("total")
return {"total": total, "list": helper.list_to_camel_case(rows)}
return {"total": total, "list": rows}

View file

@ -2,7 +2,7 @@ import schemas
from chalicelib.utils import helper, exp_ch_helper
from chalicelib.utils.ch_client import ClickHouseClient
PREDEFINED_PROPERTY_TYPES = {
PREDEFINED_PROPERTIES = {
"label": "String",
"hesitation_time": "UInt32",
"name": "String",
@ -62,15 +62,16 @@ PREDEFINED_PROPERTY_TYPES = {
def get_all_properties(project_id: int, page: schemas.PaginatedSchema):
with ClickHouseClient() as ch_client:
r = ch_client.format(
"""SELECT COUNT(1) OVER () AS total,
property_name AS name, display_name,
"""SELECT COUNT(1) OVER () AS total, property_name AS name,
display_name,
array_agg(DISTINCT event_properties.value_type) AS possible_types
FROM product_analytics.all_properties
LEFT JOIN product_analytics.event_properties USING (project_id, property_name)
WHERE all_properties.project_id=%(project_id)s
GROUP BY property_name,display_name
WHERE all_properties.project_id = %(project_id)s
GROUP BY property_name, display_name
ORDER BY display_name
LIMIT %(limit)s OFFSET %(offset)s;""",
LIMIT %(limit)s
OFFSET %(offset)s;""",
parameters={"project_id": project_id,
"limit": page.limit,
"offset": (page.page - 1) * page.limit})
@ -82,11 +83,24 @@ def get_all_properties(project_id: int, page: schemas.PaginatedSchema):
for i, p in enumerate(properties):
p["id"] = f"prop_{i}"
p["_foundInPredefinedList"] = False
if p["name"] in PREDEFINED_PROPERTY_TYPES:
p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]])
if p["name"] in PREDEFINED_PROPERTIES:
p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTIES[p["name"]])
p["_foundInPredefinedList"] = True
p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"])))
p.pop("total")
keys = [p["name"] for p in properties]
for p in PREDEFINED_PROPERTIES:
if p not in keys:
total += 1
properties.append({
"name": p,
"displayName": "",
"possibleTypes": [
],
"id": f"prop_{len(properties) + 1}",
"_foundInPredefinedList": False,
"dataType": PREDEFINED_PROPERTIES[p]
})
return {"total": total, "list": properties}
@ -98,9 +112,9 @@ def get_event_properties(project_id: int, event_name):
array_agg(DISTINCT event_properties.value_type) AS possible_types
FROM product_analytics.event_properties
INNER JOIN product_analytics.all_properties USING (property_name)
WHERE event_properties.project_id=%(project_id)s
AND all_properties.project_id=%(project_id)s
AND event_properties.event_name=%(event_name)s
WHERE event_properties.project_id = %(project_id)s
AND all_properties.project_id = %(project_id)s
AND event_properties.event_name = %(event_name)s
GROUP BY ALL
ORDER BY 1;""",
parameters={"project_id": project_id, "event_name": event_name})
@ -109,8 +123,8 @@ def get_event_properties(project_id: int, event_name):
for i, p in enumerate(properties):
p["id"] = f"prop_{i}"
p["_foundInPredefinedList"] = False
if p["name"] in PREDEFINED_PROPERTY_TYPES:
p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTY_TYPES[p["name"]])
if p["name"] in PREDEFINED_PROPERTIES:
p["dataType"] = exp_ch_helper.simplify_clickhouse_type(PREDEFINED_PROPERTIES[p["name"]])
p["_foundInPredefinedList"] = True
p["possibleTypes"] = list(set(exp_ch_helper.simplify_clickhouse_types(p["possibleTypes"])))
@ -120,24 +134,26 @@ def get_event_properties(project_id: int, event_name):
def get_lexicon(project_id: int, page: schemas.PaginatedSchema):
with ClickHouseClient() as ch_client:
r = ch_client.format(
"""SELECT COUNT(1) OVER () AS total,
all_properties.property_name AS name,
"""SELECT COUNT(1) OVER () AS total, all_properties.property_name AS name,
all_properties.*,
possible_types.values AS possible_types,
possible_values.values AS sample_values
FROM product_analytics.all_properties
LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value_type) AS values
LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value_type) AS
values
FROM product_analytics.event_properties
WHERE project_id=%(project_id)s
GROUP BY 1, 2) AS possible_types
USING (project_id, property_name)
LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value) AS values
LEFT JOIN (SELECT project_id, property_name, array_agg(DISTINCT value) AS
values
FROM product_analytics.property_values_samples
WHERE project_id=%(project_id)s
GROUP BY 1, 2) AS possible_values USING (project_id, property_name)
WHERE project_id=%(project_id)s
WHERE project_id = %(project_id)s
ORDER BY display_name
LIMIT %(limit)s OFFSET %(offset)s;""",
LIMIT %(limit)s
OFFSET %(offset)s;""",
parameters={"project_id": project_id,
"limit": page.limit,
"offset": (page.page - 1) * page.limit})