From 910173d7ab2755a685b1ffa755051ac8da8c2a1a Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Wed, 11 Jan 2023 17:02:10 +0100 Subject: [PATCH 1/9] Changed output format of sessions insights --- ee/api/chalicelib/core/sessions_insights.py | 177 +++++++++++--------- 1 file changed, 102 insertions(+), 75 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index 12ec73215..31310e4d1 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -122,9 +122,14 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): res = conn.execute(query=query) else: query = conn.format(query=query, params=params) + print("--------------------") + print(query) + print("--------------------") res = conn.execute(query=query) table_hh1, table_hh2, columns, this_period_hosts, last_period_hosts = __get_two_values(res, time_index='hh', - name_index='source') + name_index='source') + test = [k[4] for k in table_hh1] + print(f'length {len(test)}, uniques {len(set(test))}') del res new_hosts = [x for x in this_period_hosts if x not in last_period_hosts] @@ -137,20 +142,35 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): delta_success = dict() for n in common_names: d1_tmp = _table_where(table_hh1, source_idx, n) - # d1_tmp = table_hh1[table_hh1[:, source_idx] == n] d2_tmp = _table_where(table_hh2, source_idx, n) - # d2_tmp = table_hh2[table_hh2[:, source_idx] == n] delta_duration[n] = _mean_table_index(d1_tmp, duration_idx) - _mean_table_index(d2_tmp, duration_idx) - # delta_duration[n] = d1_tmp[:, duration_idx].mean() - d2_tmp[:, duration_idx].mean() delta_success[n] = _mean_table_index(d1_tmp, success_idx) - _mean_table_index(d2_tmp, success_idx) - # delta_success[n] = d1_tmp[:, success_idx].mean() - d2_tmp[:, success_idx].mean() - names_idx = columns.index('names') - d1_tmp = _sort_table_index(table_hh1, success_idx) - # d1_tmp = d1_tmp[d1_tmp[:, success_idx].argsort()] - return {'ratio': list(zip(_table_slice(d1_tmp, source_idx), _table_slice(d1_tmp, success_idx))), - 'increase': sorted(delta_success.items(), key=lambda k: k[1], reverse=False), - 'newEvents': new_hosts} + #names_idx = columns.index('names') + total = _sum_table_index(table_hh1, duration_idx) + d1_tmp = _sort_table_index(table_hh1, duration_idx, reverse=True) + _tmp = _table_slice(d1_tmp, duration_idx) + _tmp2 = _table_slice(d1_tmp, source_idx) + + increase = sorted(delta_duration.items(), key=lambda k: k[1], reverse=True) + ratio = sorted(zip(_tmp2, _tmp), key=lambda k: k[1], reverse=True) + names_ = set([k[0] for k in increase[:3]+ratio[:3]]+new_hosts[:3]) + + results = list() + for n in names_: + data_ = {'category': 'network', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} + for n_, v in ratio: + if n == n_: + data_['value'] = v + data_['ratio'] = v/total + break + for n_, v in increase: + if n == n_: + data_['increase'] = v + data_['isNew'] = False + break + results.append(data_) + return results def query_most_errors_by_period(project_id, start_time, end_time, conn=None): @@ -166,13 +186,13 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): FROM (SELECT arrayJoin(arrayMap(x -> toDateTime(x), range(start, end, %(step_size)s))) as hh) AS T1 LEFT JOIN (SELECT session_id, name, source, message, toStartOfInterval(datetime, INTERVAL %(step_size)s second) as dtime FROM experimental.events - WHERE project_id = {project_id} + WHERE project_id = {project_id} + AND datetime >= toDateTime(%(startTimestamp)s/1000) + AND datetime < toDateTime(%(endTimestamp)s/1000) AND {" AND ".join(conditions)}) AS T2 ON T2.dtime = T1.hh GROUP BY T1.hh, T2.name ORDER BY T1.hh DESC;""" - # print("----------------------------------") - # print(query) - # print("----------------------------------") + if conn is None: with ch_client.ClickHouseClient() as conn: query = conn.format(query=query, params=params) @@ -183,6 +203,7 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): table_hh1, table_hh2, columns, this_period_errors, last_period_errors = __get_two_values(res, time_index='hh', name_index='names') + print(f'res {res}') del res new_errors = [x for x in this_period_errors if x not in last_period_errors] @@ -192,18 +213,31 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): names_idx = columns.index('names') percentage_errors = dict() total = _sum_table_index(table_hh1, sessions_idx) - # total = table_hh1[:, sessions_idx].sum() error_increase = dict() for n in this_period_errors: - percentage_errors[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) / total - # percentage_errors[n] = (table_hh1[table_hh1[:, names_idx] == n][:, sessions_idx].sum())/total + percentage_errors[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) for n in common_errors: error_increase[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), names_idx) - _sum_table_index( _table_where(table_hh2, names_idx, n), names_idx) - # error_increase[n] = table_hh1[table_hh1[:, names_idx] == n][:, names_idx].sum() - table_hh2[table_hh2[:, names_idx] == n][:, names_idx].sum() - return {'ratio': sorted(percentage_errors.items(), key=lambda k: k[1], reverse=True), - 'increase': sorted(error_increase.items(), key=lambda k: k[1], reverse=True), - 'newEvents': new_errors} + ratio = sorted(percentage_errors.items(), key=lambda k: k[1], reverse=True) + increase = sorted(error_increase.items(), key=lambda k: k[1], reverse=True) + names_ = set([k[0] for k in increase[:3] + ratio[:3]] + new_errors[:3]) + + results = list() + for n in names_: + data_ = {'category': 'errors', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} + for n_, v in ratio: + if n == n_: + data_['value'] = v + data_['ratio'] = v/total + break + for n_, v in increase: + if n == n_: + data_['increase'] = v + data_['isNew'] = False + break + results.append(data_) + return results def query_cpu_memory_by_period(project_id, start_time, end_time, conn=None): @@ -238,11 +272,11 @@ def query_cpu_memory_by_period(project_id, start_time, end_time, conn=None): cpu_idx = columns.index('cpu_used') _tmp = _mean_table_index(table_hh2, memory_idx) - # _tmp = table_hh2[:, memory_idx].mean() # TODO: what if _tmp=0 ? _tmp = 1 if _tmp == 0 else _tmp - return {'cpuIncrease': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), - 'memoryIncrease': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp} + return [{'category': 'resources', + 'cpuIncrease': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), + 'memoryIncrease': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp}] def query_click_rage_by_period(project_id, start_time, end_time, conn=None): @@ -253,7 +287,7 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): conditions = ["issue_type = 'click_rage'", "event_type = 'ISSUE'"] query = f"""WITH toUInt32(toStartOfInterval(toDateTime(%(startTimestamp)s/1000), INTERVAL %(step_size)s second)) AS start, toUInt32(toStartOfInterval(toDateTime(%(endTimestamp)s/1000), INTERVAL %(step_size)s second)) AS end - SELECT T1.hh, count(T2.session_id) as sessions, T2.url_host as names, groupUniqArray(T2.url_path) as sources + SELECT T1.hh, count(T2.session_id) as sessions, groupUniqArray(T2.url_host) as names, T2.url_path as sources FROM (SELECT arrayJoin(arrayMap(x -> toDateTime(x), range(start, end, %(step_size)s))) as hh) AS T1 LEFT JOIN (SELECT session_id, url_host, url_path, toStartOfInterval(datetime, INTERVAL %(step_size)s second ) as dtime FROM experimental.events @@ -261,7 +295,7 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): AND datetime >= toDateTime(%(startTimestamp)s/1000) AND datetime < toDateTime(%(endTimestamp)s/1000) AND {" AND ".join(conditions)}) AS T2 ON T2.dtime = T1.hh - GROUP BY T1.hh, T2.url_host + GROUP BY T1.hh, T2.url_path ORDER BY T1.hh DESC;""" if conn is None: with ch_client.ClickHouseClient() as conn: @@ -278,14 +312,16 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): res = conn.execute(query=query) table_hh1, table_hh2, columns, this_period_rage, last_period_rage = __get_two_values(res, time_index='hh', - name_index='names') + name_index='sources') del res new_names = [x for x in this_period_rage if x not in last_period_rage] common_names = [x for x in this_period_rage if x not in new_names] + print(f'[res...] {new_names}\n') + print(f'[common...] {common_names}\n') sessions_idx = columns.index('sessions') - names_idx = columns.index('names') + names_idx = columns.index('sources') raged_increment = dict() # TODO verify line (188) _tmp = table_hh2[:, sessions_idx][n].sum() @@ -293,61 +329,52 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): if n is None: continue _tmp = _sum_table_index(_table_where(table_hh2, names_idx, n), sessions_idx) - # _tmp = table_hh2[:, sessions_idx][n].sum() raged_increment[n] = (_sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) - _tmp) / _tmp - # raged_increment[n] = (table_hh1[:, sessions_idx][n].sum()-_tmp)/_tmp total = _sum_table_index(table_hh1, sessions_idx) - # total = table_hh1[:, sessions_idx].sum() - return {'ratio': list( - zip(_table_slice(table_hh1, names_idx), map(lambda k: k / total, _table_slice(table_hh1, sessions_idx)))), - 'increase': sorted(raged_increment.items(), key=lambda k: k[1], reverse=True), - 'newEvents': new_names, - } + names, ratio = _table_slice(table_hh1, names_idx), _table_slice(table_hh1, sessions_idx) + ratio = sorted(zip(names, ratio), key=lambda k: k[1], reverse=True) + increase = sorted(raged_increment.items(), key=lambda k: k[1], reverse=True) + names_ = set([k[0] for k in increase[:3] + ratio[:3]] + new_names[:3]) + + results = list() + for n in names_: + data_ = {'category': 'rage', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} + for n_, v in ratio: + if n == n_: + data_['value'] = v + data_['ratio'] = v/total + break + for n_, v in increase: + if n == n_: + data_['increase'] = v + data_['isNew'] = False + break + results.append(data_) + return results def fetch_selected(project_id, data: schemas_ee.GetInsightsSchema): - output = {} + output = list() with ch_client.ClickHouseClient() as conn: if schemas_ee.InsightCategories.errors in data.categories: - output[schemas_ee.InsightCategories.errors] = query_most_errors_by_period(project_id=project_id, - start_time=data.startTimestamp, - end_time=data.endTimestamp, - conn=conn) + output += query_most_errors_by_period(project_id=project_id, + start_time=data.startTimestamp, + end_time=data.endTimestamp, + conn=conn) if schemas_ee.InsightCategories.network in data.categories: - output[schemas_ee.InsightCategories.network] = query_requests_by_period(project_id=project_id, - start_time=data.startTimestamp, - end_time=data.endTimestamp, - conn=conn) + output += query_requests_by_period(project_id=project_id, + start_time=data.startTimestamp, + end_time=data.endTimestamp, + conn=conn) if schemas_ee.InsightCategories.rage in data.categories: - output[schemas_ee.InsightCategories.rage] = query_click_rage_by_period(project_id=project_id, - start_time=data.startTimestamp, - end_time=data.endTimestamp, - conn=conn) + output += query_click_rage_by_period(project_id=project_id, + start_time=data.startTimestamp, + end_time=data.endTimestamp, + conn=conn) if schemas_ee.InsightCategories.resources in data.categories: - output[schemas_ee.InsightCategories.resources] = query_cpu_memory_by_period(project_id=project_id, - start_time=data.startTimestamp, - end_time=data.endTimestamp, - conn=conn) + output += query_cpu_memory_by_period(project_id=project_id, + start_time=data.startTimestamp, + end_time=data.endTimestamp, + conn=conn) return output - -# if __name__ == '__main__': -# # configs -# start = '2022-04-19' -# end = '2022-04-21' -# projectId = 1307 -# time_step = 'hour' -# -# # Errors widget -# print('Errors example') -# res = query_most_errors_by_period(projectId, start_time=start, end_time=end, time_step=time_step) -# print(res) -# -# # Resources widgets -# print('resources example') -# res = query_cpu_memory_by_period(projectId, start_time=start, end_time=end, time_step=time_step) -# -# # Network widgets -# print('Network example') -# res = query_requests_by_period(projectId, start_time=start, end_time=end, time_step=time_step) -# print(res) From 6085a2bf8d6b7ba77b16fd4cb85d9035d6c96092 Mon Sep 17 00:00:00 2001 From: rjshrjndrn Date: Thu, 12 Jan 2023 16:03:42 +0100 Subject: [PATCH 2/9] ci(helm): setting app namespace as default Signed-off-by: rjshrjndrn --- .github/workflows/api-ee.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/api-ee.yaml b/.github/workflows/api-ee.yaml index f30c1b111..c014f34c5 100644 --- a/.github/workflows/api-ee.yaml +++ b/.github/workflows/api-ee.yaml @@ -110,7 +110,9 @@ jobs: cat /tmp/image_override.yaml # Deploy command - helm template openreplay -n app openreplay -f vars.yaml -f /tmp/image_override.yaml --set ingress-nginx.enabled=false --set skipMigration=true --no-hooks | kubectl apply -n app -f - + kubectl config set-context --namespace=app --current + kubectl config get-contexts + helm template openreplay -n app openreplay -f vars.yaml -f /tmp/image_override.yaml --set ingress-nginx.enabled=false --set skipMigration=true --no-hooks | kubectl apply -f - env: DOCKER_REPO: ${{ secrets.EE_REGISTRY_URL }} # We're not passing -ee flag, because helm will add that. From d30d47e286ff4970bf5b5099b510ad9342c8d36b Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Thu, 12 Jan 2023 17:10:37 +0100 Subject: [PATCH 3/9] Skip null values --- ee/api/chalicelib/core/sessions_insights.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index a0b5b8dad..40d11652d 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -158,6 +158,8 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): results = list() for n in names_: + if n is None: + continue data_ = {'category': 'network', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} for n_, v in ratio: if n == n_: @@ -225,6 +227,8 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): results = list() for n in names_: + if n is None: + continue data_ = {'category': 'errors', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} for n_, v in ratio: if n == n_: @@ -339,6 +343,8 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): results = list() for n in names_: + if n is None: + continue data_ = {'category': 'rage', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} for n_, v in ratio: if n == n_: From 1b88bd8dfe69473f47feac9a6a62c93a020793aa Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Fri, 13 Jan 2023 15:38:34 +0100 Subject: [PATCH 4/9] Changed format of insights resources --- ee/api/chalicelib/core/sessions_insights.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index 40d11652d..0365ced1a 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -279,8 +279,16 @@ def query_cpu_memory_by_period(project_id, start_time, end_time, conn=None): # TODO: what if _tmp=0 ? _tmp = 1 if _tmp == 0 else _tmp return [{'category': 'resources', - 'cpuIncrease': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), - 'memoryIncrease': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp}] + 'name': 'cpu', + 'value': None, + 'increase': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), + 'isNew': None}, + {'category': 'resources', + 'name': 'memory', + 'value': None, + 'increase': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp, + 'isNew': None} + ] def query_click_rage_by_period(project_id, start_time, end_time, conn=None): From 95e6d1ea9d5ce1f9ecd563582b4759e40d9125e6 Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Fri, 13 Jan 2023 16:15:39 +0100 Subject: [PATCH 5/9] Changed name "increase" into "change" --- ee/api/chalicelib/core/sessions_insights.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index 0365ced1a..aca585068 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -160,7 +160,7 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): for n in names_: if n is None: continue - data_ = {'category': 'network', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} + data_ = {'category': 'network', 'name': n, 'value': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: data_['value'] = v @@ -168,7 +168,7 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): break for n_, v in increase: if n == n_: - data_['increase'] = v + data_['change'] = v data_['isNew'] = False break results.append(data_) @@ -229,7 +229,7 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): for n in names_: if n is None: continue - data_ = {'category': 'errors', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} + data_ = {'category': 'errors', 'name': n, 'value': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: data_['value'] = v @@ -237,7 +237,7 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): break for n_, v in increase: if n == n_: - data_['increase'] = v + data_['change'] = v data_['isNew'] = False break results.append(data_) @@ -281,12 +281,12 @@ def query_cpu_memory_by_period(project_id, start_time, end_time, conn=None): return [{'category': 'resources', 'name': 'cpu', 'value': None, - 'increase': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), + 'change': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), 'isNew': None}, {'category': 'resources', 'name': 'memory', 'value': None, - 'increase': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp, + 'change': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp, 'isNew': None} ] @@ -353,7 +353,7 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): for n in names_: if n is None: continue - data_ = {'category': 'rage', 'name': n, 'value': None, 'ratio': None, 'increase': None, 'isNew': True} + data_ = {'category': 'rage', 'name': n, 'value': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: data_['value'] = v @@ -361,7 +361,7 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): break for n_, v in increase: if n == n_: - data_['increase'] = v + data_['change'] = v data_['isNew'] = False break results.append(data_) From de9c847092eb3ab20b8b50608d2d7d4b0ce66b9c Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Fri, 13 Jan 2023 16:43:31 +0100 Subject: [PATCH 6/9] Changed increase values into increase porcentage --- ee/api/chalicelib/core/sessions_insights.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index aca585068..50f8bcf80 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -143,7 +143,10 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): for n in common_names: d1_tmp = _table_where(table_hh1, source_idx, n) d2_tmp = _table_where(table_hh2, source_idx, n) - delta_duration[n] = _mean_table_index(d1_tmp, duration_idx) - _mean_table_index(d2_tmp, duration_idx) + _duration1 = _mean_table_index(d2_tmp, duration_idx) + if _duration1 == 0: + continue + delta_duration[n] = (_mean_table_index(d1_tmp, duration_idx) - _duration1) / _duration1 delta_success[n] = _mean_table_index(d1_tmp, success_idx) - _mean_table_index(d2_tmp, success_idx) #names_idx = columns.index('names') @@ -154,7 +157,8 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): increase = sorted(delta_duration.items(), key=lambda k: k[1], reverse=True) ratio = sorted(zip(_tmp2, _tmp), key=lambda k: k[1], reverse=True) - names_ = set([k[0] for k in increase[:3]+ratio[:3]]+new_hosts[:3]) + # names_ = set([k[0] for k in increase[:3]+ratio[:3]]+new_hosts[:3]) + names_ = set([k[0] for k in increase[:3] + ratio[:3]]) # we took out new hosts since they dont give much info results = list() for n in names_: @@ -219,8 +223,10 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): for n in this_period_errors: percentage_errors[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) for n in common_errors: - error_increase[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), names_idx) - _sum_table_index( - _table_where(table_hh2, names_idx, n), names_idx) + errors_ = _sum_table_index(_table_where(table_hh2, names_idx, n), names_idx) + if errors_ == 0: + continue + error_increase[n] = (_sum_table_index(_table_where(table_hh1, names_idx, n), names_idx) - errors_) / errors_ ratio = sorted(percentage_errors.items(), key=lambda k: k[1], reverse=True) increase = sorted(error_increase.items(), key=lambda k: k[1], reverse=True) names_ = set([k[0] for k in increase[:3] + ratio[:3]] + new_errors[:3]) From 2f6610ab3ffc992eca20ed45d3ff04f553fc88f7 Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Mon, 16 Jan 2023 10:24:52 +0100 Subject: [PATCH 7/9] Changed output format. Added oldValue and current value --- ee/api/chalicelib/core/sessions_insights.py | 27 ++++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index 50f8bcf80..fff082308 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -122,9 +122,6 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): res = conn.execute(query=query) else: query = conn.format(query=query, params=params) - print("--------------------") - print(query) - print("--------------------") res = conn.execute(query=query) table_hh1, table_hh2, columns, this_period_hosts, last_period_hosts = __get_two_values(res, time_index='hh', name_index='source') @@ -164,7 +161,7 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): for n in names_: if n is None: continue - data_ = {'category': 'network', 'name': n, 'value': None, 'ratio': None, 'change': None, 'isNew': True} + data_ = {'category': 'network', 'name': n, 'value': None, 'oldValue': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: data_['value'] = v @@ -235,7 +232,7 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): for n in names_: if n is None: continue - data_ = {'category': 'errors', 'name': n, 'value': None, 'ratio': None, 'change': None, 'isNew': True} + data_ = {'category': 'errors', 'name': n, 'value': None, 'oldValue': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: data_['value'] = v @@ -281,18 +278,24 @@ def query_cpu_memory_by_period(project_id, start_time, end_time, conn=None): memory_idx = columns.index('memory_used') cpu_idx = columns.index('cpu_used') - _tmp = _mean_table_index(table_hh2, memory_idx) + mem_newvalue = _mean_table_index(table_hh1, memory_idx) + mem_oldvalue = _mean_table_index(table_hh2, memory_idx) + cpu_newvalue = _mean_table_index(table_hh2, cpu_idx) + cpu_oldvalue = _mean_table_index(table_hh2, cpu_idx) # TODO: what if _tmp=0 ? - _tmp = 1 if _tmp == 0 else _tmp + mem_oldvalue = 1 if mem_oldvalue == 0 else mem_oldvalue + cpu_oldvalue = 1 if cpu_oldvalue == 0 else cpu_oldvalue return [{'category': 'resources', 'name': 'cpu', - 'value': None, - 'change': _mean_table_index(table_hh1, cpu_idx) - _mean_table_index(table_hh2, cpu_idx), + 'value': cpu_newvalue, + 'oldValue': cpu_oldvalue, + 'change': (cpu_newvalue - cpu_oldvalue)/cpu_oldvalue, 'isNew': None}, {'category': 'resources', 'name': 'memory', - 'value': None, - 'change': (_mean_table_index(table_hh1, memory_idx) - _tmp) / _tmp, + 'value': mem_newvalue, + 'oldValue': mem_oldvalue, + 'change': (mem_newvalue - mem_oldvalue)/mem_oldvalue, 'isNew': None} ] @@ -359,7 +362,7 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): for n in names_: if n is None: continue - data_ = {'category': 'rage', 'name': n, 'value': None, 'ratio': None, 'change': None, 'isNew': True} + data_ = {'category': 'rage', 'name': n, 'value': None, 'oldValue': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: data_['value'] = v From 5e8a7c3a365eca266bb839c496e11a8fad07da4e Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Mon, 16 Jan 2023 11:31:47 +0100 Subject: [PATCH 8/9] Fixed some issues --- ee/api/chalicelib/core/sessions_insights.py | 88 +++++++++++++-------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index fff082308..ed55ebc2e 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -134,25 +134,32 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): source_idx = columns.index('source') duration_idx = columns.index('avg_duration') - success_idx = columns.index('success_rate') - delta_duration = dict() - delta_success = dict() + # success_idx = columns.index('success_rate') + # delta_duration = dict() + # delta_success = dict() + new_duration_values = dict() + duration_values = dict() for n in common_names: d1_tmp = _table_where(table_hh1, source_idx, n) d2_tmp = _table_where(table_hh2, source_idx, n) - _duration1 = _mean_table_index(d2_tmp, duration_idx) - if _duration1 == 0: + old_duration = _mean_table_index(d2_tmp, duration_idx) + new_duration = _mean_table_index(d1_tmp, duration_idx) + if old_duration == 0: continue - delta_duration[n] = (_mean_table_index(d1_tmp, duration_idx) - _duration1) / _duration1 - delta_success[n] = _mean_table_index(d1_tmp, success_idx) - _mean_table_index(d2_tmp, success_idx) + duration_values[n] = new_duration, old_duration, (new_duration-old_duration)/old_duration + # delta_duration[n] = (_mean_table_index(d1_tmp, duration_idx) - _duration1) / _duration1 + # delta_success[n] = _mean_table_index(d1_tmp, success_idx) - _mean_table_index(d2_tmp, success_idx) + for n in new_hosts: + d1_tmp = _table_where(table_hh1, source_idx, n) + new_duration_values[n] = _mean_table_index(d1_tmp, duration_idx) - #names_idx = columns.index('names') + #names_idx = columns.index('names') total = _sum_table_index(table_hh1, duration_idx) d1_tmp = _sort_table_index(table_hh1, duration_idx, reverse=True) _tmp = _table_slice(d1_tmp, duration_idx) _tmp2 = _table_slice(d1_tmp, source_idx) - increase = sorted(delta_duration.items(), key=lambda k: k[1], reverse=True) + increase = sorted(duration_values.items(), key=lambda k: k[1][-1], reverse=True) ratio = sorted(zip(_tmp2, _tmp), key=lambda k: k[1], reverse=True) # names_ = set([k[0] for k in increase[:3]+ratio[:3]]+new_hosts[:3]) names_ = set([k[0] for k in increase[:3] + ratio[:3]]) # we took out new hosts since they dont give much info @@ -164,12 +171,15 @@ def query_requests_by_period(project_id, start_time, end_time, conn=None): data_ = {'category': 'network', 'name': n, 'value': None, 'oldValue': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: - data_['value'] = v + if n in new_hosts: + data_['value'] = new_duration_values[n] data_['ratio'] = v/total break for n_, v in increase: if n == n_: - data_['change'] = v + data_['value'] = v[0] + data_['oldValue'] = v[1] + data_['change'] = v[2] data_['isNew'] = False break results.append(data_) @@ -206,7 +216,6 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): table_hh1, table_hh2, columns, this_period_errors, last_period_errors = __get_two_values(res, time_index='hh', name_index='names') - print(f'res {res}') del res new_errors = [x for x in this_period_errors if x not in last_period_errors] @@ -216,16 +225,21 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): names_idx = columns.index('names') percentage_errors = dict() total = _sum_table_index(table_hh1, sessions_idx) - error_increase = dict() + # error_increase = dict() + new_error_values = dict() + error_values = dict() for n in this_period_errors: percentage_errors[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) + new_error_values[n] = _sum_table_index(_table_where(table_hh1, names_idx, n), names_idx) for n in common_errors: - errors_ = _sum_table_index(_table_where(table_hh2, names_idx, n), names_idx) - if errors_ == 0: + old_errors = _sum_table_index(_table_where(table_hh2, names_idx, n), names_idx) + if old_errors == 0: continue - error_increase[n] = (_sum_table_index(_table_where(table_hh1, names_idx, n), names_idx) - errors_) / errors_ + new_errors = _sum_table_index(_table_where(table_hh1, names_idx, n), names_idx) + # error_increase[n] = (new_errors - old_errors) / old_errors + error_values[n] = new_errors, old_errors, (new_errors - old_errors) / old_errors ratio = sorted(percentage_errors.items(), key=lambda k: k[1], reverse=True) - increase = sorted(error_increase.items(), key=lambda k: k[1], reverse=True) + increase = sorted(error_values.items(), key=lambda k: k[1][-1], reverse=True) names_ = set([k[0] for k in increase[:3] + ratio[:3]] + new_errors[:3]) results = list() @@ -235,12 +249,15 @@ def query_most_errors_by_period(project_id, start_time, end_time, conn=None): data_ = {'category': 'errors', 'name': n, 'value': None, 'oldValue': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: - data_['value'] = v + if n in new_errors: + data_['value'] = new_error_values[n] data_['ratio'] = v/total break for n_, v in increase: if n == n_: - data_['change'] = v + data_['value'] = v[0] + data_['oldValue'] = v[1] + data_['change'] = v[2] data_['isNew'] = False break results.append(data_) @@ -321,15 +338,9 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): if conn is None: with ch_client.ClickHouseClient() as conn: query = conn.format(query=query, params=params) - print("--------------------") - print(query) - print("--------------------") res = conn.execute(query=query) else: query = conn.format(query=query, params=params) - print("--------------------") - print(query) - print("--------------------") res = conn.execute(query=query) table_hh1, table_hh2, columns, this_period_rage, last_period_rage = __get_two_values(res, time_index='hh', @@ -338,24 +349,32 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): new_names = [x for x in this_period_rage if x not in last_period_rage] common_names = [x for x in this_period_rage if x not in new_names] - print(f'[res...] {new_names}\n') - print(f'[common...] {common_names}\n') sessions_idx = columns.index('sessions') names_idx = columns.index('sources') - raged_increment = dict() + # raged_increment = dict() + raged_values = dict() + new_raged_values = dict() # TODO verify line (188) _tmp = table_hh2[:, sessions_idx][n].sum() for n in common_names: if n is None: continue - _tmp = _sum_table_index(_table_where(table_hh2, names_idx, n), sessions_idx) - raged_increment[n] = (_sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) - _tmp) / _tmp + _oldvalue = _sum_table_index(_table_where(table_hh2, names_idx, n), sessions_idx) + _newvalue = _sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) + # raged_increment[n] = (_newvalue - _oldvalue) / _oldvalue + raged_values[n] = _newvalue, _oldvalue, (_newvalue - _oldvalue) / _oldvalue + + for n in new_names: + if n is None: + continue + _newvalue = _sum_table_index(_table_where(table_hh1, names_idx, n), sessions_idx) + new_raged_values[n] = _newvalue total = _sum_table_index(table_hh1, sessions_idx) names, ratio = _table_slice(table_hh1, names_idx), _table_slice(table_hh1, sessions_idx) ratio = sorted(zip(names, ratio), key=lambda k: k[1], reverse=True) - increase = sorted(raged_increment.items(), key=lambda k: k[1], reverse=True) + increase = sorted(raged_values.items(), key=lambda k: k[1][-1], reverse=True) names_ = set([k[0] for k in increase[:3] + ratio[:3]] + new_names[:3]) results = list() @@ -365,12 +384,15 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): data_ = {'category': 'rage', 'name': n, 'value': None, 'oldValue': None, 'ratio': None, 'change': None, 'isNew': True} for n_, v in ratio: if n == n_: - data_['value'] = v + if n in new_names: + data_['value'] = new_raged_values[n] data_['ratio'] = v/total break for n_, v in increase: if n == n_: - data_['change'] = v + data_['value'] = v[0] + data_['oldValue'] = v[1] + data_['change'] = v[2] data_['isNew'] = False break results.append(data_) From d81d849cb9dfd17aa5c1770847455ce43042e248 Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Wed, 18 Jan 2023 20:11:06 +0100 Subject: [PATCH 9/9] Changed GetInsightSchema atributes. Missing the filter handler. --- ee/api/chalicelib/core/custom_metrics.py | 2 +- ee/api/chalicelib/core/sessions_insights.py | 19 +++++++++++-------- ee/api/schemas_ee.py | 3 ++- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/ee/api/chalicelib/core/custom_metrics.py b/ee/api/chalicelib/core/custom_metrics.py index 3f9d3aaf7..f38f12a06 100644 --- a/ee/api/chalicelib/core/custom_metrics.py +++ b/ee/api/chalicelib/core/custom_metrics.py @@ -137,7 +137,7 @@ def __get_insights_chat(project_id, user_id, data: schemas_ee.CreateCardSchema): return sessions_insights.fetch_selected(project_id=project_id, data=schemas_ee.GetInsightsSchema(startTimestamp=data.startTimestamp, endTimestamp=data.endTimestamp, - categories=data.metric_value)) + metricValue=data.metric_value)) def merged_live(project_id, data: schemas_ee.CreateCardSchema, user_id=None): diff --git a/ee/api/chalicelib/core/sessions_insights.py b/ee/api/chalicelib/core/sessions_insights.py index ed55ebc2e..76f1927d6 100644 --- a/ee/api/chalicelib/core/sessions_insights.py +++ b/ee/api/chalicelib/core/sessions_insights.py @@ -1,4 +1,5 @@ -import schemas_ee +import schemas, schemas_ee +from typing import List from chalicelib.core import metrics from chalicelib.utils import ch_client @@ -401,27 +402,29 @@ def query_click_rage_by_period(project_id, start_time, end_time, conn=None): def fetch_selected(project_id, data: schemas_ee.GetInsightsSchema): output = list() - if data.categories is None or len(data.categories) == 0: - data.categories = [] + #TODO: Handle filters of GetInsightsSchema + # data.series[0].filter.filters + if data.metricValue is None or len(data.metricValue) == 0: + data.metricValue = [] for v in schemas_ee.InsightCategories: - data.categories.append(v) + data.metricValue.append(v) with ch_client.ClickHouseClient() as conn: - if schemas_ee.InsightCategories.errors in data.categories: + if schemas_ee.InsightCategories.errors in data.metricValue: output += query_most_errors_by_period(project_id=project_id, start_time=data.startTimestamp, end_time=data.endTimestamp, conn=conn) - if schemas_ee.InsightCategories.network in data.categories: + if schemas_ee.InsightCategories.network in data.metricValue: output += query_requests_by_period(project_id=project_id, start_time=data.startTimestamp, end_time=data.endTimestamp, conn=conn) - if schemas_ee.InsightCategories.rage in data.categories: + if schemas_ee.InsightCategories.rage in data.metricValue: output += query_click_rage_by_period(project_id=project_id, start_time=data.startTimestamp, end_time=data.endTimestamp, conn=conn) - if schemas_ee.InsightCategories.resources in data.categories: + if schemas_ee.InsightCategories.resources in data.metricValue: output += query_cpu_memory_by_period(project_id=project_id, start_time=data.startTimestamp, end_time=data.endTimestamp, diff --git a/ee/api/schemas_ee.py b/ee/api/schemas_ee.py index ed785dcb2..9d70ae2d9 100644 --- a/ee/api/schemas_ee.py +++ b/ee/api/schemas_ee.py @@ -51,7 +51,8 @@ class GetInsightsSchema(BaseModel): startTimestamp: int = Field(TimeUTC.now(-7)) endTimestamp: int = Field(TimeUTC.now()) # time_step: int = Field(default=3600) - categories: List[InsightCategories] = Field(...) + metricValue: List[InsightCategories] = Field(...) + series: List[schemas.CardCreateSeriesSchema] = Field([...]) class Config: alias_generator = schemas.attribute_to_camel_case