openreplay/ee/recommendation/recommendation.py

109 lines
4 KiB
Python

from utils.ch_client import ClickHouseClient
from utils.pg_client import PostgresClient
def get_features_clickhouse(**kwargs):
if 'limit' in kwargs:
limit = kwargs['limit']
else:
limit = 500
query = f"""SELECT session_id, project_id, user_id, events_count, errors_count, duration, country, issue_score, device_type, rage, jsexception, badrequest FROM (
SELECT session_id, project_id, user_id, events_count, errors_count, duration, toInt8(user_country) as country, issue_score, toInt8(user_device_type) as device_type FROM experimental.sessions WHERE user_id IS NOT NULL) as T1
INNER JOIN (SELECT session_id, project_id, sum(issue_type = 'click_rage') as rage, sum(issue_type = 'js_exception') as jsexception, sum(issue_type = 'bad_request') as badrequest FROM experimental.events WHERE event_type = 'ISSUE' AND session_id > 0 GROUP BY session_id, project_id LIMIT {limit}) as T2
ON T1.session_id = T2.session_id AND T1.project_id = T2.project_id;"""
with ClickHouseClient() as conn:
res = conn.execute(query)
return res
def query_funnels(*kwargs):
# If public.funnel is empty
funnels_query = f"""SELECT project_id, user_id, filter FROM (SELECT project_id, user_id, metric_id FROM public.metrics WHERE metric_type='funnel'
) as T1 LEFT JOIN (SELECT filter, metric_id FROM public.metric_series) as T2 ON T1.metric_id = T2.metric_id"""
# Else
# funnels_query = "SELECT project_id, user_id, filter FROM public.funnels"
with PostgresClient() as conn:
conn.execute(funnels_query)
res = conn.fetchall()
return res
def query_metrics(*kwargs):
metrics_query = """SELECT metric_type, metric_of, metric_value, metric_format FROM public.metrics"""
with PostgresClient() as conn:
conn.execute(metrics_query)
res = conn.fetchall()
return res
def query_with_filters(*kwargs):
filters_query = """SELECT T1.metric_id as metric_id, project_id, name, metric_type, metric_of, filter FROM (
SELECT metric_id, project_id, name, metric_type, metric_of FROM metric_series WHERE filter != '{}') as T1 INNER JOIN
(SELECT metric_id, filter FROM metrics) as T2 ON T1.metric_id = T2.metric_id"""
with PostgresClient() as conn:
conn.execute(filters_query)
res = conn.fetchall()
return res
def transform_funnel(project_id, user_id, data):
res = list()
for k in range(len(data)):
_tmp = data[k]
if _tmp['project_id'] != project_id or _tmp['user_id'] != user_id:
continue
else:
_tmp = _tmp['filter']['events']
res.append(_tmp)
return res
def transform_with_filter(data, *kwargs):
res = list()
for k in range(len(data)):
_tmp = data[k]
jump = False
for _key in kwargs.keys():
if data[_key] != kwargs[_key]:
jump = True
break
if jump:
continue
_type = data['metric_type']
if _type == 'funnel':
res.append(['funnel', _tmp['filter']['events']])
elif _type == 'timeseries':
res.append(['timeseries', _tmp['filter']['filters'], _tmp['filter']['events']])
elif _type == 'table':
res.append(['table', _tmp['metric_of'], _tmp['filter']['events']])
return res
def transform_data():
pass
def transform(element):
key_ = element.pop('user_id')
secondary_key_ = element.pop('session_id')
context_ = element.pop('project_id')
features_ = element
del element
return {(key_, context_): {secondary_key_: list(features_.values())}}
def get_by_project(data, project_id):
head_ = [list(d.keys())[0][1] for d in data]
index_ = [k for k in range(len(head_)) if head_[k] == project_id]
return [data[k] for k in index_]
def get_by_user(data, user_id):
head_ = [list(d.keys())[0][0] for d in data]
index_ = [k for k in range(len(head_)) if head_[k] == user_id]
return [data[k] for k in index_]
if __name__ == '__main__':
data = get_features_clickhouse()
print('Data length:', len(data))