* fix(chalice): removed support for all type: webVitals/errors/performance/resources predefined card because UI is not showing them anymore

* refactor(chalice): changed all charts CH queries
This commit is contained in:
Kraiem Taha Yassine 2025-02-11 15:11:55 +01:00 committed by GitHub
parent ba55b359fb
commit 6579b6842b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 112 additions and 619 deletions

View file

@ -1,7 +1,6 @@
import schemas
from chalicelib.core import metadata
from chalicelib.core.errors.modules import sessions
from chalicelib.core.metrics import metrics
from chalicelib.utils import ch_client, exp_ch_helper
from chalicelib.utils import helper, metrics_helper
from chalicelib.utils.TimeUTC import TimeUTC
@ -385,10 +384,13 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us
ON details.error_id=time_details.error_id
INNER JOIN (SELECT error_id, groupArray([timestamp, count]) AS chart
FROM (SELECT JSONExtractString(toString(`$properties`), 'error_id') AS error_id,
toUnixTimestamp(toStartOfInterval(created_at, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
gs.generate_series AS timestamp,
COUNT(DISTINCT session_id) AS count
FROM {MAIN_EVENTS_TABLE}
FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS gs
LEFT JOIN {MAIN_EVENTS_TABLE} ON(TRUE)
WHERE {" AND ".join(ch_sub_query)}
AND created_at >= toDateTime(timestamp / 1000)
AND created_at < toDateTime((timestamp + %(step_size)s) / 1000)
GROUP BY error_id, timestamp
ORDER BY timestamp) AS sub_table
GROUP BY error_id) AS chart_details ON details.error_id=chart_details.error_id;"""
@ -405,9 +407,7 @@ def search(data: schemas.SearchErrorsSchema, project: schemas.ProjectContext, us
r["chart"] = list(r["chart"])
for i in range(len(r["chart"])):
r["chart"][i] = {"timestamp": r["chart"][i][0], "count": r["chart"][i][1]}
r["chart"] = metrics.__complete_missing_steps(rows=r["chart"], start_time=data.startTimestamp,
end_time=data.endTimestamp,
density=data.density, neutral={"count": 0})
return {
'total': total,
'errors': helper.list_to_camel_case(rows)

View file

@ -7,9 +7,7 @@ logger = logging.getLogger(__name__)
if config("EXP_METRICS", cast=bool, default=False):
logger.info(">>> Using experimental metrics")
from chalicelib.core.metrics import heatmaps_ch as heatmaps
from chalicelib.core.metrics import metrics_ch as metrics
from chalicelib.core.metrics import product_analytics_ch as product_analytics
else:
from chalicelib.core.metrics import heatmaps
from chalicelib.core.metrics import metrics
from chalicelib.core.metrics import product_analytics

View file

@ -6,7 +6,7 @@ from fastapi import HTTPException, status
import schemas
from chalicelib.core import issues
from chalicelib.core.errors import errors
from chalicelib.core.metrics import heatmaps, product_analytics, funnels, custom_metrics_predefined
from chalicelib.core.metrics import heatmaps, product_analytics, funnels
from chalicelib.core.sessions import sessions, sessions_search
from chalicelib.utils import helper, pg_client
from chalicelib.utils.TimeUTC import TimeUTC
@ -153,11 +153,6 @@ def __get_table_chart(project: schemas.ProjectContext, data: schemas.CardTable,
def get_chart(project: schemas.ProjectContext, data: schemas.CardSchema, user_id: int):
if data.is_predefined:
return custom_metrics_predefined.get_metric(key=data.metric_of,
project_id=project.project_id,
data=data.model_dump())
supported = {
schemas.MetricType.TIMESERIES: __get_timeseries_chart,
schemas.MetricType.TABLE: __get_table_chart,
@ -195,8 +190,6 @@ def get_sessions(project: schemas.ProjectContext, user_id, data: schemas.CardSes
def get_issues(project: schemas.ProjectContext, user_id: int, data: schemas.CardSchema):
if data.is_predefined:
return not_supported()
if data.metric_of == schemas.MetricOfTable.ISSUES:
return __get_table_of_issues(project=project, user_id=user_id, data=data)
supported = {
@ -598,11 +591,7 @@ def make_chart_from_card(project: schemas.ProjectContext, user_id, metric_id, da
raw_metric["density"] = data.density
metric: schemas.CardSchema = schemas.CardSchema(**raw_metric)
if metric.is_predefined:
return custom_metrics_predefined.get_metric(key=metric.metric_of,
project_id=project.project_id,
data=data.model_dump())
elif metric.metric_type == schemas.MetricType.HEAT_MAP:
if metric.metric_type == schemas.MetricType.HEAT_MAP:
if raw_metric["data"] and raw_metric["data"].get("sessionId"):
return heatmaps.get_selected_session(project_id=project.project_id,
session_id=raw_metric["data"]["sessionId"])

View file

@ -1,15 +0,0 @@
import logging
import schemas
from chalicelib.core.metrics import metrics
logger = logging.getLogger(__name__)
def get_metric(key: schemas.MetricOfWebVitals, project_id: int, data: dict):
supported = {
schemas.MetricOfWebVitals.AVG_VISITED_PAGES: metrics.get_user_activity_avg_visited_pages,
schemas.MetricOfWebVitals.COUNT_USERS: metrics.get_unique_users
}
return supported.get(key, lambda *args: None)(project_id=project_id, **data)

View file

@ -1,219 +0,0 @@
import logging
import schemas
from chalicelib.core import metadata
from chalicelib.utils import helper
from chalicelib.utils import pg_client
from chalicelib.utils.TimeUTC import TimeUTC
from chalicelib.utils.metrics_helper import get_step_size
logger = logging.getLogger(__name__)
def __get_constraints(project_id, time_constraint=True, chart=False, duration=True, project=True,
project_identifier="project_id",
main_table="sessions", time_column="start_ts", data={}):
pg_sub_query = []
main_table = main_table + "." if main_table is not None and len(main_table) > 0 else ""
if project:
pg_sub_query.append(f"{main_table}{project_identifier} =%({project_identifier})s")
if duration:
pg_sub_query.append(f"{main_table}duration>0")
if time_constraint:
pg_sub_query.append(f"{main_table}{time_column} >= %(startTimestamp)s")
pg_sub_query.append(f"{main_table}{time_column} < %(endTimestamp)s")
if chart:
pg_sub_query.append(f"{main_table}{time_column} >= generated_timestamp")
pg_sub_query.append(f"{main_table}{time_column} < generated_timestamp + %(step_size)s")
return pg_sub_query + __get_meta_constraint(project_id=project_id, data=data)
def __merge_charts(list1, list2, time_key="timestamp"):
if len(list1) != len(list2):
raise Exception("cannot merge unequal lists")
result = []
for i in range(len(list1)):
timestamp = min(list1[i][time_key], list2[i][time_key])
result.append({**list1[i], **list2[i], time_key: timestamp})
return result
def __get_constraint_values(data):
params = {}
for i, f in enumerate(data.get("filters", [])):
params[f"{f['key']}_{i}"] = f["value"]
return params
def __get_meta_constraint(project_id, data):
if len(data.get("filters", [])) == 0:
return []
constraints = []
meta_keys = metadata.get(project_id=project_id)
meta_keys = {m["key"]: m["index"] for m in meta_keys}
for i, f in enumerate(data.get("filters", [])):
if f["key"] in meta_keys.keys():
key = f"sessions.metadata_{meta_keys[f['key']]})"
if f["value"] in ["*", ""]:
constraints.append(f"{key} IS NOT NULL")
else:
constraints.append(f"{key} = %({f['key']}_{i})s")
else:
filter_type = f["key"].upper()
filter_type = [filter_type, "USER" + filter_type, filter_type[4:]]
if any(item in [schemas.FilterType.USER_BROWSER] \
for item in filter_type):
constraints.append(f"sessions.user_browser = %({f['key']}_{i})s")
elif any(item in [schemas.FilterType.USER_OS, schemas.FilterType.USER_OS_MOBILE] \
for item in filter_type):
constraints.append(f"sessions.user_os = %({f['key']}_{i})s")
elif any(item in [schemas.FilterType.USER_DEVICE, schemas.FilterType.USER_DEVICE_MOBILE] \
for item in filter_type):
constraints.append(f"sessions.user_device = %({f['key']}_{i})s")
elif any(item in [schemas.FilterType.USER_COUNTRY, schemas.FilterType.USER_COUNTRY_MOBILE] \
for item in filter_type):
constraints.append(f"sessions.user_country = %({f['key']}_{i})s")
elif any(item in [schemas.FilterType.USER_ID, schemas.FilterType.USER_ID_MOBILE] \
for item in filter_type):
constraints.append(f"sessions.user_id = %({f['key']}_{i})s")
elif any(item in [schemas.FilterType.USER_ANONYMOUS_ID, schemas.FilterType.USER_ANONYMOUS_ID_MOBILE] \
for item in filter_type):
constraints.append(f"sessions.user_anonymous_id = %({f['key']}_{i})s")
elif any(item in [schemas.FilterType.REV_ID, schemas.FilterType.REV_ID_MOBILE] \
for item in filter_type):
constraints.append(f"sessions.rev_id = %({f['key']}_{i})s")
return constraints
def __get_neutral(rows, add_All_if_empty=True):
neutral = {l: 0 for l in [i for k in [list(v.keys()) for v in rows] for i in k]}
if add_All_if_empty and len(neutral.keys()) <= 1:
neutral = {"All": 0}
return neutral
def __merge_rows_with_neutral(rows, neutral):
for i in range(len(rows)):
rows[i] = {**neutral, **rows[i]}
return rows
def __nested_array_to_dict_array(rows, key="url_host", value="count"):
for r in rows:
for i in range(len(r["keys"])):
r[r["keys"][i][key]] = r["keys"][i][value]
r.pop("keys")
return rows
def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
with pg_client.PostgresClient() as cur:
row = __get_user_activity_avg_visited_pages(cur, project_id, startTimestamp, endTimestamp, **args)
results = helper.dict_to_camel_case(row)
results["chart"] = __get_user_activity_avg_visited_pages_chart(cur, project_id, startTimestamp,
endTimestamp, **args)
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
row = __get_user_activity_avg_visited_pages(cur, project_id, startTimestamp, endTimestamp, **args)
previous = helper.dict_to_camel_case(row)
results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results
def __get_user_activity_avg_visited_pages(cur, project_id, startTimestamp, endTimestamp, **args):
pg_sub_query = __get_constraints(project_id=project_id, data=args)
pg_sub_query.append("sessions.pages_count>0")
pg_query = f"""SELECT COALESCE(CEIL(AVG(sessions.pages_count)),0) AS value
FROM public.sessions
WHERE {" AND ".join(pg_sub_query)};"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
cur.execute(cur.mogrify(pg_query, params))
row = cur.fetchone()
return row
def __get_user_activity_avg_visited_pages_chart(cur, project_id, startTimestamp, endTimestamp, density=20, **args):
step_size = get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density, factor=1)
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp}
pg_sub_query_subset = __get_constraints(project_id=project_id, time_constraint=True,
chart=False, data=args)
pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=False, project=False,
chart=True, data=args, main_table="sessions", time_column="start_ts",
duration=False)
pg_sub_query_subset.append("sessions.duration IS NOT NULL")
pg_query = f"""WITH sessions AS(SELECT sessions.pages_count, sessions.start_ts
FROM public.sessions
WHERE {" AND ".join(pg_sub_query_subset)}
)
SELECT generated_timestamp AS timestamp,
COALESCE(AVG(sessions.pages_count),0) AS value
FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp
LEFT JOIN LATERAL (
SELECT sessions.pages_count
FROM sessions
WHERE {" AND ".join(pg_sub_query_chart)}
) AS sessions ON (TRUE)
GROUP BY generated_timestamp
ORDER BY generated_timestamp;"""
cur.execute(cur.mogrify(pg_query, {**params, **__get_constraint_values(args)}))
rows = cur.fetchall()
return rows
def get_unique_users(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(),
density=7, **args):
step_size = get_step_size(startTimestamp, endTimestamp, density, factor=1)
pg_sub_query = __get_constraints(project_id=project_id, data=args)
pg_sub_query_chart = __get_constraints(project_id=project_id, time_constraint=True,
chart=True, data=args)
pg_sub_query.append("user_id IS NOT NULL")
pg_sub_query.append("user_id != ''")
pg_sub_query_chart.append("user_id IS NOT NULL")
pg_sub_query_chart.append("user_id != ''")
with pg_client.PostgresClient() as cur:
pg_query = f"""SELECT generated_timestamp AS timestamp,
COALESCE(COUNT(sessions), 0) AS value
FROM generate_series(%(startTimestamp)s, %(endTimestamp)s, %(step_size)s) AS generated_timestamp
LEFT JOIN LATERAL ( SELECT DISTINCT user_id
FROM public.sessions
WHERE {" AND ".join(pg_sub_query_chart)}
) AS sessions ON (TRUE)
GROUP BY generated_timestamp
ORDER BY generated_timestamp;"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
cur.execute(cur.mogrify(pg_query, params))
rows = cur.fetchall()
results = {
"value": sum([r["value"] for r in rows]),
"chart": rows
}
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
pg_query = f"""SELECT COUNT(DISTINCT sessions.user_id) AS count
FROM public.sessions
WHERE {" AND ".join(pg_sub_query)};"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
cur.execute(cur.mogrify(pg_query, params))
count = cur.fetchone()["count"]
results["progress"] = helper.__progress(old_val=count, new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results

View file

@ -1,278 +0,0 @@
import logging
from math import isnan
import schemas
from chalicelib.utils import ch_client
from chalicelib.utils import exp_ch_helper
from chalicelib.utils import helper
from chalicelib.utils.TimeUTC import TimeUTC
from chalicelib.utils.metrics_helper import get_step_size
logger = logging.getLogger(__name__)
def __get_basic_constraints(table_name=None, time_constraint=True, round_start=False, data={}, identifier="project_id"):
if table_name:
table_name += "."
else:
table_name = ""
ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"]
if time_constraint:
if round_start:
ch_sub_query.append(
f"toStartOfInterval({table_name}datetime, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)")
else:
ch_sub_query.append(f"{table_name}datetime >= toDateTime(%(startTimestamp)s/1000)")
ch_sub_query.append(f"{table_name}datetime < toDateTime(%(endTimestamp)s/1000)")
return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name)
def __get_basic_constraints_events(table_name=None, time_constraint=True, round_start=False, data={},
identifier="project_id"):
if table_name:
table_name += "."
else:
table_name = ""
ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"]
if time_constraint:
if round_start:
ch_sub_query.append(
f"toStartOfInterval({table_name}created_at, INTERVAL %(step_size)s second) >= toDateTime(%(startTimestamp)s/1000)")
else:
ch_sub_query.append(f"{table_name}created_at >= toDateTime(%(startTimestamp)s/1000)")
ch_sub_query.append(f"{table_name}created_at < toDateTime(%(endTimestamp)s/1000)")
return ch_sub_query + __get_generic_constraint(data=data, table_name=table_name)
def __frange(start, stop, step):
result = []
i = start
while i < stop:
result.append(i)
i += step
return result
def __add_missing_keys(original, complete):
for missing in [key for key in complete.keys() if key not in original.keys()]:
original[missing] = complete[missing]
return original
def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000):
if len(rows) == density:
return rows
step = get_step_size(start_time, end_time, density, decimal=True)
optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in
__frange(start_time // time_coefficient, end_time // time_coefficient, step)]
result = []
r = 0
o = 0
for i in range(density):
neutral_clone = dict(neutral)
for k in neutral_clone.keys():
if callable(neutral_clone[k]):
neutral_clone[k] = neutral_clone[k]()
if r < len(rows) and len(result) + len(rows) - r == density:
result += rows[r:]
break
if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]:
# complete missing keys in original object
rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone)
result.append(rows[r])
r += 1
elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]:
# complete missing keys in original object
rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone)
result.append(rows[r])
r += 1
o += 1
else:
neutral_clone[time_key] = optimal[o][0]
result.append(neutral_clone)
o += 1
return result
def __get_constraint(data, fields, table_name):
constraints = []
# for k in fields.keys():
for i, f in enumerate(data.get("filters", [])):
if f["key"] in fields.keys():
if f["value"] in ["*", ""]:
constraints.append(f"isNotNull({table_name}{fields[f['key']]})")
else:
constraints.append(f"{table_name}{fields[f['key']]} = %({f['key']}_{i})s")
# TODO: remove this in next release
offset = len(data.get("filters", []))
for i, f in enumerate(data.keys()):
if f in fields.keys():
if data[f] in ["*", ""]:
constraints.append(f"isNotNull({table_name}{fields[f]})")
else:
constraints.append(f"{table_name}{fields[f]} = %({f}_{i + offset})s")
return constraints
def __get_constraint_values(data):
params = {}
for i, f in enumerate(data.get("filters", [])):
params[f"{f['key']}_{i}"] = f["value"]
# TODO: remove this in next release
offset = len(data.get("filters", []))
for i, f in enumerate(data.keys()):
params[f"{f}_{i + offset}"] = data[f]
return params
METADATA_FIELDS = {"userId": "user_id",
"userAnonymousId": "user_anonymous_id",
"metadata1": "metadata_1",
"metadata2": "metadata_2",
"metadata3": "metadata_3",
"metadata4": "metadata_4",
"metadata5": "metadata_5",
"metadata6": "metadata_6",
"metadata7": "metadata_7",
"metadata8": "metadata_8",
"metadata9": "metadata_9",
"metadata10": "metadata_10"}
def __get_meta_constraint(data):
return __get_constraint(data=data, fields=METADATA_FIELDS, table_name="sessions_metadata.")
SESSIONS_META_FIELDS = {"revId": "rev_id",
"country": "user_country",
"os": "user_os",
"platform": "user_device_type",
"device": "user_device",
"browser": "user_browser"}
def __get_generic_constraint(data, table_name):
return __get_constraint(data=data, fields=SESSIONS_META_FIELDS, table_name=table_name)
def get_user_activity_avg_visited_pages(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), **args):
results = {}
with ch_client.ClickHouseClient() as ch:
rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
results = helper.dict_to_camel_case(rows[0])
for key in results:
if isnan(results[key]):
results[key] = 0
results["chart"] = __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp,
endTimestamp, **args)
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
rows = __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args)
if len(rows) > 0:
previous = helper.dict_to_camel_case(rows[0])
results["progress"] = helper.__progress(old_val=previous["value"], new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results
def __get_user_activity_avg_visited_pages(ch, project_id, startTimestamp, endTimestamp, **args):
ch_sub_query = __get_basic_constraints(table_name="pages", data=args)
ch_sub_query.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_query = f"""SELECT COALESCE(CEIL(avgOrNull(count)),0) AS value
FROM (SELECT COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query)}
GROUP BY session_id) AS groupped_data
WHERE count>0;"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
rows = ch.execute(query=ch_query, parameters=params)
return rows
def __get_user_activity_avg_visited_pages_chart(ch, project_id, startTimestamp, endTimestamp, density=20, **args):
step_size = get_step_size(endTimestamp=endTimestamp, startTimestamp=startTimestamp, density=density)
ch_sub_query_chart = __get_basic_constraints(table_name="pages", round_start=True, data=args)
ch_sub_query_chart.append("pages.event_type='LOCATION'")
meta_condition = __get_meta_constraint(args)
ch_sub_query_chart += meta_condition
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
ch_query = f"""SELECT timestamp, COALESCE(avgOrNull(count), 0) AS value
FROM (SELECT toUnixTimestamp(toStartOfInterval(pages.datetime, INTERVAL %(step_size)s second ))*1000 AS timestamp,
session_id, COUNT(1) AS count
FROM {exp_ch_helper.get_main_events_table(startTimestamp)} AS pages
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp,session_id
ORDER BY timestamp) AS groupped_data
WHERE count>0
GROUP BY timestamp
ORDER BY timestamp;"""
rows = ch.execute(query=ch_query, parameters=params)
rows = __complete_missing_steps(rows=rows, start_time=startTimestamp,
end_time=endTimestamp,
density=density, neutral={"value": 0})
return rows
def get_unique_users(project_id, startTimestamp=TimeUTC.now(delta_days=-1),
endTimestamp=TimeUTC.now(), density=7, **args):
step_size = get_step_size(startTimestamp, endTimestamp, density)
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
ch_sub_query_chart = __get_basic_constraints(table_name="sessions", round_start=True, data=args)
meta_condition = __get_meta_constraint(args)
ch_sub_query += meta_condition
ch_sub_query_chart += meta_condition
ch_sub_query_chart.append("isNotNull(sessions.user_id)")
ch_sub_query_chart.append("sessions.user_id!=''")
with ch_client.ClickHouseClient() as ch:
ch_query = f"""\
SELECT toUnixTimestamp(toStartOfInterval(sessions.datetime, INTERVAL %(step_size)s second)) * 1000 AS timestamp,
COUNT(DISTINCT sessions.user_id) AS value
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query_chart)}
GROUP BY timestamp
ORDER BY timestamp;\
"""
params = {"step_size": step_size, "project_id": project_id, "startTimestamp": startTimestamp,
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
rows = ch.execute(query=ch_query, parameters=params)
results = {
"value": sum([r["value"] for r in rows]),
"chart": __complete_missing_steps(rows=rows, start_time=startTimestamp, end_time=endTimestamp,
density=density,
neutral={"value": 0})
}
diff = endTimestamp - startTimestamp
endTimestamp = startTimestamp
startTimestamp = endTimestamp - diff
ch_query = f""" SELECT COUNT(DISTINCT user_id) AS count
FROM {exp_ch_helper.get_main_sessions_table(startTimestamp)} AS sessions
WHERE {" AND ".join(ch_sub_query)};"""
params = {"project_id": project_id, "startTimestamp": startTimestamp, "endTimestamp": endTimestamp,
**__get_constraint_values(args)}
count = ch.execute(query=ch_query, parameters=params)
count = count[0]["count"]
results["progress"] = helper.__progress(old_val=count, new_val=results["value"])
results["unit"] = schemas.TemplatePredefinedUnits.COUNT
return results

View file

@ -1,18 +1,14 @@
from typing import List
import schemas
from chalicelib.core.metrics.metrics_ch import __get_basic_constraints, __get_meta_constraint, \
__get_basic_constraints_events
from chalicelib.core.metrics.metrics_ch import __get_constraint_values, __complete_missing_steps
from chalicelib.utils import ch_client, exp_ch_helper
from chalicelib.utils import helper, dev
from chalicelib.utils.TimeUTC import TimeUTC
from chalicelib.utils import sql_helper as sh
from chalicelib.core import metadata
import logging
from time import time
import logging
import schemas
from chalicelib.core import metadata
from chalicelib.core.metrics.product_analytics import __transform_journey
from chalicelib.utils import ch_client, exp_ch_helper
from chalicelib.utils import helper
from chalicelib.utils import sql_helper as sh
from chalicelib.utils.TimeUTC import TimeUTC
from chalicelib.utils.metrics_helper import get_step_size
logger = logging.getLogger(__name__)
@ -24,6 +20,67 @@ JOURNEY_TYPES = {
}
def __get_basic_constraints_events(table_name=None, identifier="project_id"):
if table_name:
table_name += "."
else:
table_name = ""
ch_sub_query = [f"{table_name}{identifier} =toUInt16(%({identifier})s)"]
ch_sub_query.append(f"{table_name}created_at >= toDateTime(%(startTimestamp)s/1000)")
ch_sub_query.append(f"{table_name}created_at < toDateTime(%(endTimestamp)s/1000)")
return ch_sub_query
def __frange(start, stop, step):
result = []
i = start
while i < stop:
result.append(i)
i += step
return result
def __add_missing_keys(original, complete):
for missing in [key for key in complete.keys() if key not in original.keys()]:
original[missing] = complete[missing]
return original
def __complete_missing_steps(start_time, end_time, density, neutral, rows, time_key="timestamp", time_coefficient=1000):
if len(rows) == density:
return rows
step = get_step_size(start_time, end_time, density, decimal=True)
optimal = [(int(i * time_coefficient), int((i + step) * time_coefficient)) for i in
__frange(start_time // time_coefficient, end_time // time_coefficient, step)]
result = []
r = 0
o = 0
for i in range(density):
neutral_clone = dict(neutral)
for k in neutral_clone.keys():
if callable(neutral_clone[k]):
neutral_clone[k] = neutral_clone[k]()
if r < len(rows) and len(result) + len(rows) - r == density:
result += rows[r:]
break
if r < len(rows) and o < len(optimal) and rows[r][time_key] < optimal[o][0]:
# complete missing keys in original object
rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone)
result.append(rows[r])
r += 1
elif r < len(rows) and o < len(optimal) and optimal[o][0] <= rows[r][time_key] < optimal[o][1]:
# complete missing keys in original object
rows[r] = __add_missing_keys(original=rows[r], complete=neutral_clone)
result.append(rows[r])
r += 1
o += 1
else:
neutral_clone[time_key] = optimal[o][0]
result.append(neutral_clone)
o += 1
return result
# startPoints are computed before ranked_events to reduce the number of window functions over rows
# compute avg_time_from_previous at the same level as sessions_count (this was removed in v1.22)
# if start-point is selected, the selected event is ranked n°1
@ -50,8 +107,8 @@ def path_analysis(project_id: int, data: schemas.CardPathAnalysis):
sub_events.append({"column": JOURNEY_TYPES[s.type]["column"],
"eventType": JOURNEY_TYPES[s.type]["eventType"]})
step_1_post_conditions.append(
f"(`$event_name`='{JOURNEY_TYPES[s.type]["eventType"]}' AND event_number_in_session = 1 \
OR `$event_name`!='{JOURNEY_TYPES[s.type]["eventType"]}' AND event_number_in_session > 1)")
f"(`$event_name`='{JOURNEY_TYPES[s.type]['eventType']}' AND event_number_in_session = 1 \
OR `$event_name`!='{JOURNEY_TYPES[s.type]['eventType']}' AND event_number_in_session > 1)")
extra_metric_values.append(s.type)
if not q2_extra_col:
# This is used in case start event has different type of the visible event,
@ -453,7 +510,7 @@ WITH pre_ranked_events AS (SELECT *
FROM start_points INNER JOIN pre_ranked_events USING (session_id))
SELECT *
FROM ranked_events
{q2_extra_condition if q2_extra_condition else ""}"""
{q2_extra_condition if q2_extra_condition else ""};"""
logger.debug("---------Q2-----------")
ch.execute(query=ch_query2, parameters=params)
if time() - _now > 2:
@ -465,9 +522,9 @@ FROM ranked_events
sub_cte = ""
if data.hide_excess:
sub_cte = f""",
top_n AS ({"\nUNION ALL\n".join(top_query)}),
top_n_with_next AS ({"\nUNION ALL\n".join(top_with_next_query)}),
others_n AS ({"\nUNION ALL\n".join(other_query)})"""
top_n AS ({" UNION ALL ".join(top_query)}),
top_n_with_next AS ({" UNION ALL ".join(top_with_next_query)}),
others_n AS ({" UNION ALL ".join(other_query)})"""
projection_query = """\
-- Top to Top: valid
SELECT top_n_with_next.*
@ -549,13 +606,13 @@ FROM ranked_events
next_value,
sessions_count
FROM drop_n""")
projection_query = "\nUNION ALL\n".join(projection_query)
projection_query = " UNION ALL ".join(projection_query)
ch_query3 = f"""\
WITH ranked_events AS (SELECT *
FROM ranked_events_{time_key}),
{",\n".join(steps_query)},
drop_n AS ({"\nUNION ALL\n".join(drop_query)})
{", ".join(steps_query)},
drop_n AS ({" UNION ALL ".join(drop_query)})
{sub_cte}
SELECT event_number_in_session,
`$event_name` AS event_type,

View file

@ -1,12 +1,8 @@
import logging
from typing import List, Union
import logging
from typing import List, Union
import schemas
from chalicelib.core import events, metadata
from chalicelib.core.metrics import metrics
from chalicelib.core.sessions import performance_event, sessions_legacy
from chalicelib.utils import pg_client, helper, metrics_helper, ch_client, exp_ch_helper
from chalicelib.utils import sql_helper as sh
@ -17,8 +13,8 @@ logger = logging.getLogger(__name__)
def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, density: int,
metric_type: schemas.MetricType, metric_of: schemas.MetricOfTimeseries | schemas.MetricOfTable,
metric_value: List):
step_size = int(metrics_helper.get_step_size(endTimestamp=data.endTimestamp, startTimestamp=data.startTimestamp,
density=density))
step_size = metrics_helper.get_step_size(endTimestamp=data.endTimestamp, startTimestamp=data.startTimestamp,
density=density, factor=1)
extra_event = None
if metric_of == schemas.MetricOfTable.VISITED_URL:
extra_event = f"""SELECT DISTINCT ev.session_id, ev.url_path
@ -38,25 +34,27 @@ def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, d
with ch_client.ClickHouseClient() as cur:
if metric_type == schemas.MetricType.TIMESERIES:
if metric_of == schemas.MetricOfTimeseries.SESSION_COUNT:
query = f"""SELECT toUnixTimestamp(
toStartOfInterval(processed_sessions.datetime, INTERVAL %(step_size)s second)
) * 1000 AS timestamp,
COUNT(processed_sessions.session_id) AS count
FROM (SELECT s.session_id AS session_id,
s.datetime AS datetime
{query_part}) AS processed_sessions
query = f"""SELECT gs.generate_series AS timestamp,
COALESCE(COUNT(DISTINCT processed_sessions.session_id),0) AS count
FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS gs
LEFT JOIN (SELECT s.session_id AS session_id,
s.datetime AS datetime
{query_part}) AS processed_sessions ON(TRUE)
WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000)
AND processed_sessions.datetime < toDateTime((timestamp + %(step_size)s) / 1000)
GROUP BY timestamp
ORDER BY timestamp;"""
elif metric_of == schemas.MetricOfTimeseries.USER_COUNT:
query = f"""SELECT toUnixTimestamp(
toStartOfInterval(processed_sessions.datetime, INTERVAL %(step_size)s second)
) * 1000 AS timestamp,
COUNT(DISTINCT processed_sessions.user_id) AS count
FROM (SELECT s.user_id AS user_id,
s.datetime AS datetime
{query_part}
WHERE isNotNull(s.user_id)
AND s.user_id != '') AS processed_sessions
query = f"""SELECT gs.generate_series AS timestamp,
COALESCE(COUNT(DISTINCT processed_sessions.user_id),0) AS count
FROM generate_series(%(startDate)s, %(endDate)s, %(step_size)s) AS gs
LEFT JOIN (SELECT s.user_id AS user_id,
s.datetime AS datetime
{query_part}
WHERE isNotNull(s.user_id)
AND s.user_id != '') AS processed_sessions ON(TRUE)
WHERE processed_sessions.datetime >= toDateTime(timestamp / 1000)
AND processed_sessions.datetime < toDateTime((timestamp + %(step_size)s) / 1000)
GROUP BY timestamp
ORDER BY timestamp;"""
else:
@ -67,8 +65,6 @@ def search2_series(data: schemas.SessionsSearchPayloadSchema, project_id: int, d
logging.debug(main_query)
logging.debug("--------------------")
sessions = cur.execute(main_query)
sessions = metrics.__complete_missing_steps(start_time=data.startTimestamp, end_time=data.endTimestamp,
density=density, neutral={"count": 0}, rows=sessions)
elif metric_type == schemas.MetricType.TABLE:
full_args["limit_s"] = 0

View file

@ -4,4 +4,4 @@ def get_step_size(startTimestamp, endTimestamp, density, decimal=False, factor=1
return step_size
if decimal:
return step_size / density
return step_size // (density - 1)
return step_size // density

View file

@ -870,19 +870,12 @@ class MetricType(str, Enum):
TIMESERIES = "timeseries"
TABLE = "table"
FUNNEL = "funnel"
ERRORS = "errors"
PERFORMANCE = "performance"
RESOURCES = "resources"
WEB_VITAL = "webVitals"
PATH_ANALYSIS = "pathAnalysis"
RETENTION = "retention"
STICKINESS = "stickiness"
HEAT_MAP = "heatMap"
class MetricOfWebVitals(str, Enum):
AVG_VISITED_PAGES = "avgVisitedPages"
COUNT_USERS = "userCount"
class MetricOfTable(str, Enum):
@ -1035,12 +1028,6 @@ class __CardSchema(CardSessionsSchema):
# This is used to specify the number of top values for PathAnalysis
rows: int = Field(default=3, ge=1, le=10)
@computed_field
@property
def is_predefined(self) -> bool:
return self.metric_type in [MetricType.ERRORS, MetricType.PERFORMANCE,
MetricType.RESOURCES, MetricType.WEB_VITAL]
class CardTimeSeries(__CardSchema):
metric_type: Literal[MetricType.TIMESERIES]
@ -1110,23 +1097,6 @@ class CardFunnel(__CardSchema):
return self
class CardWebVital(__CardSchema):
metric_type: Literal[MetricType.WEB_VITAL]
metric_of: MetricOfWebVitals = Field(default=MetricOfWebVitals.AVG_VISITED_PAGES)
view_type: MetricOtherViewType = Field(...)
@model_validator(mode="before")
@classmethod
def __enforce_default(cls, values):
values["series"] = []
return values
@model_validator(mode="after")
def __transform(self):
self.metric_of = MetricOfWebVitals(self.metric_of)
return self
class CardHeatMap(__CardSchema):
metric_type: Literal[MetricType.HEAT_MAP]
metric_of: MetricOfHeatMap = Field(default=MetricOfHeatMap.HEAT_MAP_URL)
@ -1216,8 +1186,7 @@ class CardPathAnalysis(__CardSchema):
# Union of cards-schemas that doesn't change between FOSS and EE
__cards_union_base = Union[
CardTimeSeries, CardTable, CardFunnel,
CardWebVital, CardHeatMap, CardPathAnalysis]
CardTimeSeries, CardTable, CardFunnel, CardHeatMap, CardPathAnalysis]
CardSchema = ORUnion(__cards_union_base, discriminator='metric_type')

3
ee/api/.gitignore vendored
View file

@ -192,14 +192,11 @@ Pipfile.lock
/chalicelib/core/canvas.py
/chalicelib/core/collaborations/*
/chalicelib/core/countries.py
/chalicelib/core/metrics/metrics.py
/chalicelib/core/metrics/custom_metrics.py
/chalicelib/core/metrics/custom_metrics_predefined.py
/chalicelib/core/metrics/dashboards.py
/chalicelib/core/metrics/funnels.py
/chalicelib/core/metrics/heatmaps.py
/chalicelib/core/metrics/heatmaps_ch.py
/chalicelib/core/metrics/metrics_ch.py
/chalicelib/core/metrics/product_analytics.py
/chalicelib/core/metrics/product_analytics_ch.py
/chalicelib/core/metrics/product_anaytics2.py

View file

@ -12,14 +12,11 @@ rm -rf ./chalicelib/core/authorizers.py
rm -rf ./chalicelib/core/autocomplete/autocomplete.py
rm -rf ./chalicelib/core/collaborations
rm -rf ./chalicelib/core/countries.py
rm -rf ./chalicelib/core/metrics/metrics.py
rm -rf ./chalicelib/core/metrics/custom_metrics.py
rm -rf ./chalicelib/core/metrics/custom_metrics_predefined.py
rm -rf ./chalicelib/core/metrics/funnels.py
rm -rf ./chalicelib/core/metrics/dashboards.py
rm -rf ./chalicelib/core/metrics/heatmaps.py
rm -rf ./chalicelib/core/metrics/heatmaps_ch.py
rm -rf ./chalicelib/core/metrics/metrics_ch.py
rm -rf ./chalicelib/core/metrics/product_analytics.py
rm -rf ./chalicelib/core/metrics/product_analytics_ch.py
rm -rf ./chalicelib/core/metrics/product_anaytics2.py

View file

@ -38,7 +38,8 @@ FROM public.metrics
WHERE metric_of IN ('domainsErrors4xx', 'domainsErrors5xx', 'countSessions',
'countRequests', 'errorsPerDomains', 'errorsPerType',
'impactedSessionsByJsErrors', 'resourcesByParty', 'userOs',
'speedLocation');
'speedLocation', 'avgVisitedPages')
OR metric_type IN ('webVitals', 'errors', 'performance', 'resources');
COMMIT;

View file

@ -38,7 +38,8 @@ FROM public.metrics
WHERE metric_of IN ('domainsErrors4xx', 'domainsErrors5xx', 'countSessions',
'countRequests', 'errorsPerDomains', 'errorsPerType',
'impactedSessionsByJsErrors', 'resourcesByParty', 'userOs',
'speedLocation');
'speedLocation', 'avgVisitedPages')
OR metric_type IN ('webVitals', 'errors', 'performance', 'resources');
COMMIT;