feat(api): ee insights optimized with new structure
This commit is contained in:
parent
6cab959968
commit
161d40d078
1 changed files with 240 additions and 184 deletions
|
|
@ -44,6 +44,9 @@ def journey(project_id, startTimestamp=TimeUTC.now(delta_days=-1), endTimestamp=
|
|||
event_column = JOURNEY_TYPES[f["value"]]["column"]
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append(f"sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append(f"sessions_metadata.datetime >= toDateTime(%(startTimestamp)s / 1000)")
|
||||
meta_condition.append(f"sessions_metadata.datetime < toDateTime(%(endTimestamp)s / 1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
ch_sub_query = __get_basic_constraints(table_name=event_table, data=args)
|
||||
meta_condition += __get_meta_constraint(args)
|
||||
|
|
@ -192,62 +195,33 @@ def users_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endT
|
|||
**args):
|
||||
startTimestamp = TimeUTC.trunc_week(startTimestamp)
|
||||
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
|
||||
ch_sub_query = __get_basic_constraints(table_name='sessions', data=args)
|
||||
ch_sub_query = __get_basic_constraints(table_name='sessions_metadata', data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions.duration>0")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s / 1000)")
|
||||
ch_sub_query.append("sessions_metadata.user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(sessions_metadata.user_id)")
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
# current optimization: from 6s to 4-5s
|
||||
ch_query = f"""SELECT toInt8((toStartOfWeek(datetime, 1) - toDate(1630886400000 / 1000)) / 7) AS week,
|
||||
COUNT(DISTINCT user_id) AS users_count
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND sessions_metadata.datetime < toDateTime(%(endTimestamp)s / 1000)
|
||||
AND user_id IN (SELECT DISTINCT user_id
|
||||
FROM sessions_metadata
|
||||
INNER JOIN sessions USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND toStartOfWeek(sessions.datetime,1) = toDate(%(startTimestamp)s / 1000)
|
||||
AND sessions_metadata.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND sessions.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND isNull((SELECT 1
|
||||
FROM sessions_metadata AS bmsess
|
||||
INNER JOIN sessions AS bsess USING (session_id)
|
||||
WHERE bsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bsess.project_id = %(project_id)s
|
||||
AND bmsess.user_id = sessions_metadata.user_id
|
||||
LIMIT 1))
|
||||
)
|
||||
GROUP BY week;"""
|
||||
# THIS IS THE ORIGINAL QUERY, PROBABLY WILL BE REUSED AGAIN WHEN CH-STRUCTURE CHANGES
|
||||
# ch_query = f"""SELECT toInt8((connexion_week - toDate(%(startTimestamp)s / 1000)) / 7) AS week,
|
||||
# COUNT(all_connexions.user_id) AS users_count,
|
||||
# groupArray(100)(all_connexions.user_id) AS connected_users
|
||||
# FROM (SELECT DISTINCT user_id
|
||||
# FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
# WHERE {" AND ".join(ch_sub_query)}
|
||||
# AND toStartOfWeek(sessions.datetime,1) = toDate(%(startTimestamp)s / 1000)
|
||||
# AND sessions_metadata.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
# AND sessions.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
# AND isNull((SELECT 1
|
||||
# FROM sessions_metadata AS bmsess INNER JOIN sessions AS bsess USING (session_id)
|
||||
# WHERE bsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
# AND bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
# AND bsess.project_id = %(project_id)s
|
||||
# AND bmsess.user_id = sessions_metadata.user_id
|
||||
# LIMIT 1))
|
||||
# ) AS users_list
|
||||
# INNER JOIN (SELECT DISTINCT user_id, toStartOfWeek(datetime,1) AS connexion_week
|
||||
# FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
# WHERE {" AND ".join(ch_sub_query)}
|
||||
# AND sessions_metadata.datetime < toDateTime(%(endTimestamp)s / 1000)
|
||||
# ) AS all_connexions USING (user_id)
|
||||
# GROUP BY connexion_week
|
||||
# ORDER BY connexion_week;"""
|
||||
ch_query = f"""SELECT toInt8((connexion_week - toDate(%(startTimestamp)s / 1000)) / 7) AS week,
|
||||
COUNT(all_connexions.user_id) AS users_count,
|
||||
groupArray(100)(all_connexions.user_id) AS connected_users
|
||||
FROM (SELECT DISTINCT user_id
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND toStartOfWeek(sessions_metadata.datetime,1) = toDate(%(startTimestamp)s / 1000)
|
||||
AND sessions_metadata.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND isNull((SELECT 1
|
||||
FROM sessions_metadata AS bmsess
|
||||
WHERE bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bmsess.project_id = %(project_id)s
|
||||
AND bmsess.user_id = sessions_metadata.user_id
|
||||
LIMIT 1))
|
||||
) AS users_list
|
||||
INNER JOIN (SELECT DISTINCT user_id, toStartOfWeek(datetime,1) AS connexion_week
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
) AS all_connexions USING (user_id)
|
||||
GROUP BY connexion_week
|
||||
ORDER BY connexion_week;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
# print(ch_query % params)
|
||||
|
|
@ -264,15 +238,13 @@ def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
filters=[], **args):
|
||||
startTimestamp = TimeUTC.trunc_week(startTimestamp)
|
||||
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
|
||||
ch_sub_query = __get_basic_constraints(table_name='sessions', data=args)
|
||||
ch_sub_query = __get_basic_constraints(table_name='sessions_metadata', data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions.duration>0")
|
||||
ch_sub_query.append("sessions_metadata.user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(sessions_metadata.user_id)")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s / 1000)")
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
# TODO: optimize after DB structure change, optimization from 6s to 4s
|
||||
ch_query = f"""SELECT toUnixTimestamp(toDateTime(first_connexion_week))*1000 AS first_connexion_week,
|
||||
week,
|
||||
users_count,
|
||||
|
|
@ -281,24 +253,21 @@ def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
SELECT first_connexion_week,
|
||||
toInt8((connexion_week - first_connexion_week) / 7) AS week,
|
||||
COUNT(DISTINCT all_connexions.user_id) AS users_count,
|
||||
groupArray(100)(all_connexions.user_id) AS connected_users
|
||||
FROM (SELECT user_id, MIN(toStartOfWeek(sessions.datetime, 1)) AS first_connexion_week
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
groupArray(20)(all_connexions.user_id) AS connected_users
|
||||
FROM (SELECT user_id, MIN(toStartOfWeek(sessions_metadata.datetime, 1)) AS first_connexion_week
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND sessions_metadata.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND sessions.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND isNull((SELECT 1
|
||||
FROM sessions_metadata AS bmsess INNER JOIN sessions AS bsess USING (session_id)
|
||||
WHERE bsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bsess.project_id = %(project_id)s
|
||||
FROM sessions_metadata AS bmsess
|
||||
WHERE bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bmsess.project_id = %(project_id)s
|
||||
AND bmsess.user_id = sessions_metadata.user_id
|
||||
LIMIT 1))
|
||||
GROUP BY user_id) AS users_list
|
||||
INNER JOIN (SELECT DISTINCT user_id, toStartOfWeek(datetime, 1) AS connexion_week
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND sessions_metadata.datetime < toDateTime(%(endTimestamp)s / 1000)
|
||||
ORDER BY connexion_week, user_id
|
||||
) AS all_connexions USING (user_id)
|
||||
WHERE first_connexion_week <= connexion_week
|
||||
|
|
@ -308,7 +277,7 @@ def users_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
print(ch_query % params)
|
||||
# print(ch_query % params)
|
||||
rows = ch.execute(ch_query, params)
|
||||
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
|
||||
return {
|
||||
|
|
@ -324,10 +293,6 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
|
||||
ch_sub_query = __get_basic_constraints(table_name='feature', data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s / 1000)")
|
||||
event_type = "PAGES"
|
||||
event_value = "/"
|
||||
extra_values = {}
|
||||
|
|
@ -339,7 +304,12 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
event_value = f["value"]
|
||||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
ch_sub_query.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
|
|
@ -348,9 +318,9 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
if default:
|
||||
# get most used value
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
FROM {event_table} AS feature
|
||||
{"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND length({event_column}) > 2
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT 1;"""
|
||||
|
|
@ -360,7 +330,22 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
row = ch.execute(ch_query, params)
|
||||
if len(row) > 0:
|
||||
event_value = row[0]["value"]
|
||||
else:
|
||||
print(f"no {event_table} most used value")
|
||||
return {
|
||||
"startTimestamp": startTimestamp,
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type},
|
||||
{"type": "EVENT_VALUE", "value": ""}],
|
||||
"chart": __complete_retention(rows=[], start_date=startTimestamp, end_date=TimeUTC.now())
|
||||
}
|
||||
extra_values["value"] = event_value
|
||||
if len(meta_condition) == 0:
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
ch_query = f"""SELECT toInt8((connexion_week - toDate(%(startTimestamp)s / 1000)) / 7) AS week,
|
||||
COUNT(DISTINCT all_connexions.user_id) AS users_count,
|
||||
|
|
@ -376,6 +361,7 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
WHERE bsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bsess.project_id = %(project_id)s
|
||||
AND bmsess.project_id = %(project_id)s
|
||||
AND bmsess.user_id = sessions_metadata.user_id
|
||||
AND bsess.{event_column}=%(value)s
|
||||
LIMIT 1))
|
||||
|
|
@ -383,7 +369,6 @@ def feature_retention(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
INNER JOIN (SELECT DISTINCT user_id, toStartOfWeek(datetime,1) AS connexion_week
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND sessions_metadata.datetime < toDateTime(%(endTimestamp)s / 1000)
|
||||
ORDER BY connexion_week, user_id
|
||||
) AS all_connexions USING (user_id)
|
||||
GROUP BY connexion_week
|
||||
|
|
@ -408,10 +393,6 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
|
||||
ch_sub_query = __get_basic_constraints(table_name='feature', data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s / 1000)")
|
||||
|
||||
event_type = "PAGES"
|
||||
event_value = "/"
|
||||
|
|
@ -424,7 +405,13 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
event_value = f["value"]
|
||||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
ch_sub_query.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
|
|
@ -432,9 +419,9 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
if default:
|
||||
# get most used value
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
FROM {event_table} AS feature
|
||||
{"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND length({event_column}) > 2
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT 1;"""
|
||||
|
|
@ -444,7 +431,24 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
row = ch.execute(ch_query, params)
|
||||
if len(row) > 0:
|
||||
event_value = row[0]["value"]
|
||||
else:
|
||||
print(f"no {event_table} most used value")
|
||||
return {
|
||||
"startTimestamp": startTimestamp,
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type},
|
||||
{"type": "EVENT_VALUE", "value": ""}],
|
||||
"chart": __complete_acquisition(rows=[], start_date=startTimestamp, end_date=TimeUTC.now())
|
||||
}
|
||||
extra_values["value"] = event_value
|
||||
|
||||
if len(meta_condition) == 0:
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
ch_query = f"""SELECT toUnixTimestamp(toDateTime(first_connexion_week))*1000 AS first_connexion_week,
|
||||
week,
|
||||
|
|
@ -461,17 +465,19 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
AND sessions_metadata.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND feature.datetime < toDateTime(%(startTimestamp)s/1000 + 8 * 24 * 60 * 60 )
|
||||
AND isNull((SELECT 1
|
||||
FROM sessions_metadata AS bmsess INNER JOIN sessions AS bsess USING (session_id)
|
||||
FROM sessions_metadata AS bmsess
|
||||
INNER JOIN {event_table} AS bsess USING (session_id)
|
||||
WHERE bsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bmsess.datetime < toDateTime(%(startTimestamp)s / 1000)
|
||||
AND bsess.project_id = %(project_id)s
|
||||
AND bmsess.project_id = %(project_id)s
|
||||
AND bmsess.user_id = sessions_metadata.user_id
|
||||
AND bsess.{event_column} = %(value)s
|
||||
LIMIT 1))
|
||||
GROUP BY user_id) AS users_list
|
||||
INNER JOIN (SELECT DISTINCT user_id, toStartOfWeek(datetime, 1) AS connexion_week
|
||||
FROM sessions_metadata INNER JOIN {event_table} AS feature USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND sessions_metadata.datetime < toDateTime(%(endTimestamp)s / 1000)
|
||||
ORDER BY connexion_week, user_id
|
||||
) AS all_connexions USING (user_id)
|
||||
WHERE first_connexion_week <= connexion_week
|
||||
|
|
@ -481,7 +487,7 @@ def feature_acquisition(project_id, startTimestamp=TimeUTC.now(delta_days=-70),
|
|||
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(ch_query % params)
|
||||
print(ch_query % params)
|
||||
rows = ch.execute(ch_query, params)
|
||||
rows = __compute_weekly_percentage(helper.list_to_camel_case(rows))
|
||||
return {
|
||||
|
|
@ -498,9 +504,6 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
endTimestamp = startTimestamp + 10 * TimeUTC.MS_WEEK
|
||||
ch_sub_query = __get_basic_constraints(table_name='feature', data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
|
||||
event_table = JOURNEY_TYPES["CLICK"]["table"]
|
||||
event_column = JOURNEY_TYPES["CLICK"]["column"]
|
||||
|
|
@ -510,47 +513,55 @@ def feature_popularity_frequency(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
event_table = JOURNEY_TYPES[f["value"]]["table"]
|
||||
event_column = JOURNEY_TYPES[f["value"]]["column"]
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
ch_sub_query.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
# TODO: change this query to not use join, optimization from 5s to 1s
|
||||
ch_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
FROM sessions AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND user_id IS NOT NULL
|
||||
AND not empty(user_id);"""
|
||||
if len(meta_condition) == 0:
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query += meta_condition
|
||||
ch_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(meta_condition)};"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
all_user_count = ch.execute(ch_query, params)
|
||||
if len(all_user_count) == 0 or all_user_count[0]["count"] == 0:
|
||||
return []
|
||||
all_user_count = all_user_count[0]["count"]
|
||||
ch_sub_query.append(f"length({event_column})>2")
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(DISTINCT user_id) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND user_id IS NOT NULL
|
||||
AND not empty(user_id)
|
||||
AND length({event_column})>2
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT 7;"""
|
||||
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
popularity = ch.execute(ch_query, params)
|
||||
params["values"] = [p["value"] for p in popularity]
|
||||
|
||||
if len(params["values"]) == 0:
|
||||
return []
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(session_id) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
AND {event_column} IN %(values)s
|
||||
GROUP BY value;"""
|
||||
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
frequencies = ch.execute(ch_query, params)
|
||||
total_usage = sum([f["count"] for f in frequencies])
|
||||
frequencies = {f["value"]: f["count"] for f in frequencies}
|
||||
|
|
@ -577,6 +588,11 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
|
|
@ -584,48 +600,58 @@ def feature_adoption(project_id, startTimestamp=TimeUTC.now(delta_days=-70), end
|
|||
ch_sub_query = __get_basic_constraints(table_name='feature', data=args)
|
||||
meta_condition += __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
# TODO: optimize this when DB structure changes, optimization from 3s to 1s
|
||||
ch_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
FROM sessions_metadata INNER JOIN sessions AS feature USING(session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)};"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
all_user_count = ch.execute(ch_query, params)
|
||||
if len(all_user_count) == 0 or all_user_count[0]["count"] == 0:
|
||||
return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type},
|
||||
{"type": "EVENT_VALUE", "value": event_value}], }
|
||||
all_user_count = all_user_count[0]["count"]
|
||||
if default:
|
||||
# get most used value
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
FROM {event_table} AS feature
|
||||
{"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
LIMIT 1;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
row = ch.execute(ch_query, params)
|
||||
if len(row) > 0:
|
||||
event_value = row[0]["value"]
|
||||
# else:
|
||||
# print(f"no {event_table} most used value")
|
||||
# return {"target": 0, "adoption": 0,
|
||||
# "filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": ""}]}
|
||||
|
||||
extra_values["value"] = event_value
|
||||
|
||||
if len(meta_condition) == 0:
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
ch_sub_query += meta_condition
|
||||
ch_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(meta_condition)};"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
all_user_count = ch.execute(ch_query, params)
|
||||
if len(all_user_count) == 0 or all_user_count[0]["count"] == 0:
|
||||
return {"adoption": 0, "target": 0, "filters": [{"type": "EVENT_TYPE", "value": event_type},
|
||||
{"type": "EVENT_VALUE", "value": event_value}], }
|
||||
all_user_count = all_user_count[0]["count"]
|
||||
|
||||
ch_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
ch_query = f"""SELECT COUNT(DISTINCT user_id) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)};"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
adoption = ch.execute(ch_query, params)
|
||||
adoption = adoption[0]["count"] / all_user_count
|
||||
return {"target": all_user_count, "adoption": adoption,
|
||||
|
|
@ -648,21 +674,24 @@ def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("user_id IS NOT NULL")
|
||||
meta_condition.append("not empty(sessions_metadata.user_id)")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
ch_sub_query = __get_basic_constraints(table_name='feature', data=args)
|
||||
meta_condition += __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
if default:
|
||||
# get most used value
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
FROM {event_table} AS feature
|
||||
{"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
|
|
@ -672,9 +701,19 @@ def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days
|
|||
row = ch.execute(ch_query, params)
|
||||
if len(row) > 0:
|
||||
event_value = row[0]["value"]
|
||||
else:
|
||||
print(f"no {event_table} most used value")
|
||||
return {"users": [],
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": ""}]}
|
||||
|
||||
extra_values["value"] = event_value
|
||||
if len(meta_condition) == 0:
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(sessions_metadata.user_id)")
|
||||
ch_sub_query.append("sessions_metadata.project_id = %(project_id)s")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
# TODO: no possible optimization right now
|
||||
ch_query = f"""SELECT user_id, COUNT(DISTINCT session_id) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
|
|
@ -683,7 +722,7 @@ def feature_adoption_top_users(project_id, startTimestamp=TimeUTC.now(delta_days
|
|||
LIMIT 10;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
print(ch_query % params)
|
||||
# print(ch_query % params)
|
||||
rows = ch.execute(ch_query, params)
|
||||
return {"users": helper.list_to_camel_case(rows),
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type}, {"type": "EVENT_VALUE", "value": event_value}]}
|
||||
|
|
@ -705,8 +744,9 @@ def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= %(startTimestamp)s")
|
||||
meta_condition.append("sessions_metadata.datetime < %(endTimestamp)s")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
|
|
@ -726,13 +766,20 @@ def feature_adoption_daily_usage(project_id, startTimestamp=TimeUTC.now(delta_da
|
|||
LIMIT 1;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
# print(ch_query% params)
|
||||
# print(ch_query % params)
|
||||
row = ch.execute(ch_query, params)
|
||||
if len(row) > 0:
|
||||
event_value = row[0]["value"]
|
||||
else:
|
||||
print(f"no {event_table} most used value")
|
||||
return {
|
||||
"startTimestamp": startTimestamp,
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type},
|
||||
{"type": "EVENT_VALUE", "value": ""}],
|
||||
"chart": __complete_acquisition(rows=[], start_date=startTimestamp, end_date=TimeUTC.now())
|
||||
}
|
||||
extra_values["value"] = event_value
|
||||
ch_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
# optimal
|
||||
ch_query = f"""SELECT toUnixTimestamp(day)*1000 AS timestamp, count
|
||||
FROM (SELECT toStartOfDay(feature.datetime) AS day, COUNT(DISTINCT session_id) AS count
|
||||
FROM {event_table} AS feature {"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
|
|
@ -762,15 +809,17 @@ def feature_intensity(project_id, startTimestamp=TimeUTC.now(delta_days=-70), en
|
|||
event_column = JOURNEY_TYPES[f["value"]]["column"]
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
ch_sub_query = __get_basic_constraints(table_name="feature", data=args)
|
||||
meta_condition += __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
ch_query = f"""SELECT {event_column} AS value, AVG(DISTINCT session_id) AS avg
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
FROM {event_table} AS feature
|
||||
{"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY value
|
||||
ORDER BY avg DESC
|
||||
|
|
@ -792,7 +841,7 @@ PERIOD_TO_FUNCTION = {
|
|||
@dev.timed
|
||||
def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[],
|
||||
**args):
|
||||
meta_condition = []
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
period = "DAY"
|
||||
extra_values = {}
|
||||
for f in filters:
|
||||
|
|
@ -802,18 +851,14 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime
|
|||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
extra_values["user_id"] = f["value"]
|
||||
period_function = PERIOD_TO_FUNCTION[period]
|
||||
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
|
||||
meta_condition += __get_meta_constraint(args)
|
||||
ch_sub_query = __get_basic_constraints(table_name="sessions_metadata", data=args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(sessions_metadata.user_id)")
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
# TODO: optimize this when DB structure changes, optimization from 3s to 1s
|
||||
ch_query = f"""SELECT SUM(count) / intDiv(%(endTimestamp)s - %(startTimestamp)s, %(step_size)s) AS avg
|
||||
FROM (SELECT {period_function}(sessions_metadata.datetime) AS period, count(DISTINCT user_id) AS count
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY period) AS daily_users;"""
|
||||
params = {"step_size": TimeUTC.MS_DAY if period == "DAY" else TimeUTC.MS_WEEK,
|
||||
|
|
@ -821,8 +866,8 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime
|
|||
"startTimestamp": TimeUTC.trunc_day(startTimestamp) if period == "DAY" else TimeUTC.trunc_week(
|
||||
startTimestamp), "endTimestamp": endTimestamp, **__get_constraint_values(args),
|
||||
**extra_values}
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
avg = ch.execute(ch_query, params)
|
||||
if len(avg) == 0 or avg[0]["avg"] == 0:
|
||||
return {"avg": 0, "chart": []}
|
||||
|
|
@ -830,53 +875,51 @@ def users_active(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTime
|
|||
# TODO: optimize this when DB structure changes, optimization from 3s to 1s
|
||||
ch_query = f"""SELECT toUnixTimestamp(toDateTime(period))*1000 AS timestamp, count
|
||||
FROM (SELECT {period_function}(sessions_metadata.datetime) AS period, count(DISTINCT user_id) AS count
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY period
|
||||
ORDER BY period) AS raw_results;"""
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
rows = ch.execute(ch_query, params)
|
||||
return {"avg": avg, "chart": rows}
|
||||
|
||||
|
||||
@dev.timed
|
||||
def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[], **args):
|
||||
ch_sub_query = __get_basic_constraints(table_name="sessions", data=args)
|
||||
ch_sub_query = __get_basic_constraints(table_name="sessions_metadata", data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions_metadata.user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(sessions_metadata.user_id)")
|
||||
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
# TODO: optimize this when DB structure changes, optimization from 4s to 1s
|
||||
ch_query = f"""SELECT AVG(count) AS avg
|
||||
ch_query = f"""SELECT ifNotFinite(AVG(count),0) AS avg
|
||||
FROM(SELECT COUNT(user_id) AS count
|
||||
FROM (SELECT user_id, COUNT(DISTINCT toStartOfDay(datetime)) AS number_of_days
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY user_id) AS users_connexions
|
||||
GROUP BY number_of_days
|
||||
ORDER BY number_of_days) AS results;"""
|
||||
params = {"project_id": project_id,
|
||||
"startTimestamp": startTimestamp, "endTimestamp": endTimestamp, **__get_constraint_values(args)}
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
avg = ch.execute(ch_query, params)
|
||||
if len(avg) == 0 or avg[0]["avg"] == 0:
|
||||
return {"avg": 0, "partition": []}
|
||||
avg = avg[0]["avg"]
|
||||
# TODO: optimize this when DB structure changes, optimization from 4s to 1s
|
||||
ch_query = f"""SELECT number_of_days, COUNT(user_id) AS count
|
||||
FROM (SELECT user_id, COUNT(DISTINCT toStartOfDay(datetime)) AS number_of_days
|
||||
FROM sessions_metadata INNER JOIN sessions USING (session_id)
|
||||
FROM sessions_metadata
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY user_id) AS users_connexions
|
||||
GROUP BY number_of_days
|
||||
ORDER BY number_of_days;"""
|
||||
|
||||
print(ch_query % params)
|
||||
print("---------------------")
|
||||
# print(ch_query % params)
|
||||
# print("---------------------")
|
||||
rows = ch.execute(ch_query, params)
|
||||
|
||||
return {"avg": avg, "partition": helper.list_to_camel_case(rows)}
|
||||
|
|
@ -885,6 +928,7 @@ def users_power(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimes
|
|||
@dev.timed
|
||||
def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTimestamp=TimeUTC.now(), filters=[],
|
||||
**args):
|
||||
ch_sub_query = __get_basic_constraints(table_name="feature", data=args)
|
||||
event_type = "PAGES"
|
||||
event_value = "/"
|
||||
extra_values = {}
|
||||
|
|
@ -898,22 +942,21 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi
|
|||
default = False
|
||||
elif f["type"] in [sessions_metas.meta_type.USERID, sessions_metas.meta_type.USERID_IOS]:
|
||||
meta_condition.append(f"sessions_metadata.user_id = %(user_id)s")
|
||||
meta_condition.append("sessions_metadata.project_id = %(project_id)s")
|
||||
meta_condition.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
meta_condition.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
extra_values["user_id"] = f["value"]
|
||||
event_table = JOURNEY_TYPES[event_type]["table"]
|
||||
event_column = JOURNEY_TYPES[event_type]["column"]
|
||||
|
||||
ch_sub_query = __get_basic_constraints(table_name="feature", data=args)
|
||||
meta_condition += __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(user_id)")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
if default:
|
||||
# get most used value
|
||||
ch_query = f"""SELECT {event_column} AS value, COUNT(*) AS count
|
||||
FROM {event_table} AS feature INNER JOIN sessions_metadata USING (session_id)
|
||||
FROM {event_table} AS feature
|
||||
{"INNER JOIN sessions_metadata USING (session_id)" if len(meta_condition) > 0 else ""}
|
||||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY value
|
||||
ORDER BY count DESC
|
||||
|
|
@ -924,9 +967,22 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi
|
|||
row = ch.execute(ch_query, params)
|
||||
if len(row) > 0:
|
||||
event_value = row[0]["value"]
|
||||
else:
|
||||
print(f"no {event_table} most used value")
|
||||
return {
|
||||
"startTimestamp": startTimestamp,
|
||||
"filters": [{"type": "EVENT_TYPE", "value": event_type},
|
||||
{"type": "EVENT_VALUE", "value": ""}],
|
||||
"list": []
|
||||
}
|
||||
extra_values["value"] = event_value
|
||||
if len(meta_condition) == 0:
|
||||
ch_sub_query.append("sessions_metadata.user_id IS NOT NULL")
|
||||
ch_sub_query.append("not empty(sessions_metadata.user_id)")
|
||||
ch_sub_query.append("sessions_metadata.project_id = %(project_id)s")
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
ch_sub_query.append(f"feature.{event_column} = %(value)s")
|
||||
# TODO: no possible optimization right now
|
||||
ch_query = f"""SELECT user_id,
|
||||
toUnixTimestamp(last_time)*1000 AS last_time,
|
||||
interactions_count,
|
||||
|
|
@ -938,7 +994,9 @@ def users_slipping(project_id, startTimestamp=TimeUTC.now(delta_days=-70), endTi
|
|||
WHERE {" AND ".join(ch_sub_query)}
|
||||
GROUP BY user_id ) AS user_last_usage INNER JOIN sessions_metadata USING (user_id)
|
||||
WHERE now() - last_time > 7
|
||||
GROUP BY user_id, last_time, interactions_count) AS raw_results;"""
|
||||
GROUP BY user_id, last_time, interactions_count
|
||||
ORDER BY interactions_count DESC, last_time DESC
|
||||
LIMIT 50) AS raw_results;"""
|
||||
params = {"project_id": project_id, "startTimestamp": startTimestamp,
|
||||
"endTimestamp": endTimestamp, **__get_constraint_values(args), **extra_values}
|
||||
print(ch_query % params)
|
||||
|
|
@ -960,31 +1018,29 @@ def search(text, feature_type, project_id, platform=None):
|
|||
ch_sub_query = __get_basic_constraints(table_name="feature", data=args)
|
||||
meta_condition = __get_meta_constraint(args)
|
||||
ch_sub_query += meta_condition
|
||||
ch_sub_query.append("sessions_metadata.datetime >= toDateTime(%(startTimestamp)s/1000)")
|
||||
ch_sub_query.append("sessions_metadata.datetime < toDateTime(%(endTimestamp)s/1000)")
|
||||
params = {"startTimestamp": TimeUTC.now() - 1 * TimeUTC.MS_MONTH,
|
||||
"endTimestamp": TimeUTC.now(),
|
||||
"project_id": project_id,
|
||||
"value": helper.string_to_sql_like(text.lower()),
|
||||
"value": text.lower(),
|
||||
"platform_0": platform}
|
||||
if feature_type == "ALL":
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
sub_queries = []
|
||||
for e in JOURNEY_TYPES:
|
||||
sub_queries.append(f"""(SELECT DISTINCT {JOURNEY_TYPES[e]["column"]} AS value, '{e}' AS "type"
|
||||
FROM {JOURNEY_TYPES[e]["table"]} AS feature INNER JOIN sessions_metadata USING(session_id)
|
||||
FROM {JOURNEY_TYPES[e]["table"]} AS feature
|
||||
WHERE {" AND ".join(ch_sub_query)} AND positionUTF8({JOURNEY_TYPES[e]["column"]},%(value)s)!=0
|
||||
LIMIT 10)""")
|
||||
ch_query = "UNION ALL".join(sub_queries)
|
||||
# print(ch_query, params)
|
||||
print(ch_query % params)
|
||||
rows = ch.execute(ch_query, params)
|
||||
elif JOURNEY_TYPES.get(feature_type) is not None:
|
||||
with ch_client.ClickHouseClient() as ch:
|
||||
ch_query = f"""SELECT DISTINCT {JOURNEY_TYPES[feature_type]["column"]} AS value, '{feature_type}' AS "type"
|
||||
FROM {JOURNEY_TYPES[feature_type]["table"]} AS feature INNER JOIN sessions_metadata USING(session_id)
|
||||
FROM {JOURNEY_TYPES[feature_type]["table"]} AS feature
|
||||
WHERE {" AND ".join(ch_sub_query)} AND positionUTF8({JOURNEY_TYPES[feature_type]["column"]},%(value)s)!=0
|
||||
LIMIT 10;"""
|
||||
# print(ch_query, params)
|
||||
print(ch_query % params)
|
||||
rows = ch.execute(ch_query, params)
|
||||
else:
|
||||
return []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue