368 lines
14 KiB
Python
368 lines
14 KiB
Python
import pandas as pd
|
|
from db.models import DetailedEvent, Event, Session, DATABASE
|
|
|
|
dtypes_events = {'sessionid': "Int64",
|
|
'connectioninformation_downlink': "Int64",
|
|
'connectioninformation_type': "string",
|
|
'consolelog_level': "string",
|
|
'consolelog_value': "string",
|
|
'customevent_messageid': "Int64",
|
|
'customevent_name': "string",
|
|
'customevent_payload': "string",
|
|
'customevent_timestamp': "Int64",
|
|
'errorevent_message': "string",
|
|
'errorevent_messageid': "Int64",
|
|
'errorevent_name': "string",
|
|
'errorevent_payload': "string",
|
|
'errorevent_source': "string",
|
|
'errorevent_timestamp': "Int64",
|
|
'jsexception_message': "string",
|
|
'jsexception_name': "string",
|
|
'jsexception_payload': "string",
|
|
'metadata_key': "string",
|
|
'metadata_value': "string",
|
|
'mouseclick_id': "Int64",
|
|
'mouseclick_hesitationtime': "Int64",
|
|
'mouseclick_label': "string",
|
|
'pageevent_firstcontentfulpaint': "Int64",
|
|
'pageevent_firstpaint': "Int64",
|
|
'pageevent_messageid': "Int64",
|
|
'pageevent_referrer': "string",
|
|
'pageevent_speedindex': "Int64",
|
|
'pageevent_timestamp': "Int64",
|
|
'pageevent_url': "string",
|
|
'pagerendertiming_timetointeractive': "Int64",
|
|
'pagerendertiming_visuallycomplete': "Int64",
|
|
'rawcustomevent_name': "string",
|
|
'rawcustomevent_payload': "string",
|
|
'setviewportsize_height': "Int64",
|
|
'setviewportsize_width': "Int64",
|
|
'timestamp_timestamp': "Int64",
|
|
'user_anonymous_id': "string",
|
|
'user_id': "string",
|
|
'issueevent_messageid': "Int64",
|
|
'issueevent_timestamp': "Int64",
|
|
'issueevent_type': "string",
|
|
'issueevent_contextstring': "string",
|
|
'issueevent_context': "string",
|
|
'issueevent_payload': "string",
|
|
'customissue_name': "string",
|
|
'customissue_payload': "string",
|
|
'received_at': "Int64",
|
|
'batch_order_number': "Int64"}
|
|
dtypes_detailed_events = {
|
|
"sessionid": "Int64",
|
|
"clickevent_hesitationtime": "Int64",
|
|
"clickevent_label": "object",
|
|
"clickevent_messageid": "Int64",
|
|
"clickevent_timestamp": "Int64",
|
|
"connectioninformation_downlink": "Int64",
|
|
"connectioninformation_type": "object",
|
|
"consolelog_level": "object",
|
|
"consolelog_value": "object",
|
|
"cpuissue_duration": "Int64",
|
|
"cpuissue_rate": "Int64",
|
|
"cpuissue_timestamp": "Int64",
|
|
"createdocument": "boolean",
|
|
"createelementnode_id": "Int64",
|
|
"createelementnode_parentid": "Int64",
|
|
"cssdeleterule_index": "Int64",
|
|
"cssdeleterule_stylesheetid": "Int64",
|
|
"cssinsertrule_index": "Int64",
|
|
"cssinsertrule_rule": "object",
|
|
"cssinsertrule_stylesheetid": "Int64",
|
|
"customevent_messageid": "Int64",
|
|
"customevent_name": "object",
|
|
"customevent_payload": "object",
|
|
"customevent_timestamp": "Int64",
|
|
"domdrop_timestamp": "Int64",
|
|
"errorevent_message": "object",
|
|
"errorevent_messageid": "Int64",
|
|
"errorevent_name": "object",
|
|
"errorevent_payload": "object",
|
|
"errorevent_source": "object",
|
|
"errorevent_timestamp": "Int64",
|
|
"fetch_duration": "Int64",
|
|
"fetch_method": "object",
|
|
"fetch_request": "object",
|
|
"fetch_response": "object",
|
|
"fetch_status": "Int64",
|
|
"fetch_timestamp": "Int64",
|
|
"fetch_url": "object",
|
|
"graphql_operationkind": "object",
|
|
"graphql_operationname": "object",
|
|
"graphql_response": "object",
|
|
"graphql_variables": "object",
|
|
"graphqlevent_messageid": "Int64",
|
|
"graphqlevent_name": "object",
|
|
"graphqlevent_timestamp": "Int64",
|
|
"inputevent_label": "object",
|
|
"inputevent_messageid": "Int64",
|
|
"inputevent_timestamp": "Int64",
|
|
"inputevent_value": "object",
|
|
"inputevent_valuemasked": "boolean",
|
|
"jsexception_message": "object",
|
|
"jsexception_name": "object",
|
|
"jsexception_payload": "object",
|
|
"longtasks_timestamp": "Int64",
|
|
"longtasks_duration": "Int64",
|
|
"longtasks_containerid": "object",
|
|
"longtasks_containersrc": "object",
|
|
"memoryissue_duration": "Int64",
|
|
"memoryissue_rate": "Int64",
|
|
"memoryissue_timestamp": "Int64",
|
|
"metadata_key": "object",
|
|
"metadata_value": "object",
|
|
"mobx_payload": "object",
|
|
"mobx_type": "object",
|
|
"mouseclick_id": "Int64",
|
|
"mouseclick_hesitationtime": "Int64",
|
|
"mouseclick_label": "object",
|
|
"mousemove_x": "Int64",
|
|
"mousemove_y": "Int64",
|
|
"movenode_id": "Int64",
|
|
"movenode_index": "Int64",
|
|
"movenode_parentid": "Int64",
|
|
"ngrx_action": "object",
|
|
"ngrx_duration": "Int64",
|
|
"ngrx_state": "object",
|
|
"otable_key": "object",
|
|
"otable_value": "object",
|
|
"pageevent_domcontentloadedeventend": "Int64",
|
|
"pageevent_domcontentloadedeventstart": "Int64",
|
|
"pageevent_firstcontentfulpaint": "Int64",
|
|
"pageevent_firstpaint": "Int64",
|
|
"pageevent_loaded": "boolean",
|
|
"pageevent_loadeventend": "Int64",
|
|
"pageevent_loadeventstart": "Int64",
|
|
"pageevent_messageid": "Int64",
|
|
"pageevent_referrer": "object",
|
|
"pageevent_requeststart": "Int64",
|
|
"pageevent_responseend": "Int64",
|
|
"pageevent_responsestart": "Int64",
|
|
"pageevent_speedindex": "Int64",
|
|
"pageevent_timestamp": "Int64",
|
|
"pageevent_url": "object",
|
|
"pageloadtiming_domcontentloadedeventend": "Int64",
|
|
"pageloadtiming_domcontentloadedeventstart": "Int64",
|
|
"pageloadtiming_firstcontentfulpaint": "Int64",
|
|
"pageloadtiming_firstpaint": "Int64",
|
|
"pageloadtiming_loadeventend": "Int64",
|
|
"pageloadtiming_loadeventstart": "Int64",
|
|
"pageloadtiming_requeststart": "Int64",
|
|
"pageloadtiming_responseend": "Int64",
|
|
"pageloadtiming_responsestart": "Int64",
|
|
"pagerendertiming_speedindex": "Int64",
|
|
"pagerendertiming_timetointeractive": "Int64",
|
|
"pagerendertiming_visuallycomplete": "Int64",
|
|
"performancetrack_frames": "Int64",
|
|
"performancetrack_ticks": "Int64",
|
|
"performancetrack_totaljsheapsize": "Int64",
|
|
"performancetrack_usedjsheapsize": "Int64",
|
|
"performancetrackaggr_avgcpu": "Int64",
|
|
"performancetrackaggr_avgfps": "Int64",
|
|
"performancetrackaggr_avgtotaljsheapsize": "Int64",
|
|
"performancetrackaggr_avgusedjsheapsize": "Int64",
|
|
"performancetrackaggr_maxcpu": "Int64",
|
|
"performancetrackaggr_maxfps": "Int64",
|
|
"performancetrackaggr_maxtotaljsheapsize": "Int64",
|
|
"performancetrackaggr_maxusedjsheapsize": "Int64",
|
|
"performancetrackaggr_mincpu": "Int64",
|
|
"performancetrackaggr_minfps": "Int64",
|
|
"performancetrackaggr_mintotaljsheapsize": "Int64",
|
|
"performancetrackaggr_minusedjsheapsize": "Int64",
|
|
"performancetrackaggr_timestampend": "Int64",
|
|
"performancetrackaggr_timestampstart": "Int64",
|
|
"profiler_args": "object",
|
|
"profiler_duration": "Int64",
|
|
"profiler_name": "object",
|
|
"profiler_result": "object",
|
|
"rawcustomevent_name": "object",
|
|
"rawcustomevent_payload": "object",
|
|
"rawerrorevent_message": "object",
|
|
"rawerrorevent_name": "object",
|
|
"rawerrorevent_payload": "object",
|
|
"rawerrorevent_source": "object",
|
|
"rawerrorevent_timestamp": "Int64",
|
|
"redux_action": "object",
|
|
"redux_duration": "Int64",
|
|
"redux_state": "object",
|
|
"removenode_id": "Int64",
|
|
"removenodeattribute_id": "Int64",
|
|
"removenodeattribute_name": "object",
|
|
"resourceevent_decodedbodysize": "Int64",
|
|
"resourceevent_duration": "Int64",
|
|
"resourceevent_encodedbodysize": "Int64",
|
|
"resourceevent_headersize": "Int64",
|
|
"resourceevent_messageid": "Int64",
|
|
"resourceevent_method": "object",
|
|
"resourceevent_status": "Int64",
|
|
"resourceevent_success": "boolean",
|
|
"resourceevent_timestamp": "Int64",
|
|
"resourceevent_ttfb": "Int64",
|
|
"resourceevent_type": "object",
|
|
"resourceevent_url": "object",
|
|
"resourcetiming_decodedbodysize": "Int64",
|
|
"resourcetiming_duration": "Int64",
|
|
"resourcetiming_encodedbodysize": "Int64",
|
|
"resourcetiming_headersize": "Int64",
|
|
"resourcetiming_initiator": "object",
|
|
"resourcetiming_timestamp": "Int64",
|
|
"resourcetiming_ttfb": "Int64",
|
|
"resourcetiming_url": "object",
|
|
"sessiondisconnect": "boolean",
|
|
"sessiondisconnect_timestamp": "Int64",
|
|
"sessionend": "boolean",
|
|
"sessionend_timestamp": "Int64",
|
|
"sessionstart_projectid": "Int64",
|
|
"sessionstart_revid": "object",
|
|
"sessionstart_timestamp": "Int64",
|
|
"sessionstart_trackerversion": "object",
|
|
"sessionstart_useragent": "object",
|
|
"sessionstart_userbrowser": "object",
|
|
"sessionstart_userbrowserversion": "object",
|
|
"sessionstart_usercountry": "object",
|
|
"sessionstart_userdevice": "object",
|
|
"sessionstart_userdeviceheapsize": "Int64",
|
|
"sessionstart_userdevicememorysize": "Int64",
|
|
"sessionstart_userdevicetype": "object",
|
|
"sessionstart_useros": "object",
|
|
"sessionstart_userosversion": "object",
|
|
"sessionstart_useruuid": "object",
|
|
"setcssdata_data": "Int64",
|
|
"setcssdata_id": "Int64",
|
|
"setinputchecked_checked": "Int64",
|
|
"setinputchecked_id": "Int64",
|
|
"setinputtarget_id": "Int64",
|
|
"setinputtarget_label": "Int64",
|
|
"setinputvalue_id": "Int64",
|
|
"setinputvalue_mask": "Int64",
|
|
"setinputvalue_value": "Int64",
|
|
"setnodeattribute_id": "Int64",
|
|
"setnodeattribute_name": "Int64",
|
|
"setnodeattribute_value": "Int64",
|
|
"setnodedata_data": "Int64",
|
|
"setnodedata_id": "Int64",
|
|
"setnodescroll_id": "Int64",
|
|
"setnodescroll_x": "Int64",
|
|
"setnodescroll_y": "Int64",
|
|
"setpagelocation_navigationstart": "Int64",
|
|
"setpagelocation_referrer": "object",
|
|
"setpagelocation_url": "object",
|
|
"setpagevisibility_hidden": "boolean",
|
|
"setviewportscroll_x": "Int64",
|
|
"setviewportscroll_y": "Int64",
|
|
"setviewportsize_height": "Int64",
|
|
"setviewportsize_width": "Int64",
|
|
"stateaction_type": "object",
|
|
"stateactionevent_messageid": "Int64",
|
|
"stateactionevent_timestamp": "Int64",
|
|
"stateactionevent_type": "object",
|
|
"timestamp_timestamp": "Int64",
|
|
"useranonymousid_id": "object",
|
|
"userid_id": "object",
|
|
"vuex_mutation": "object",
|
|
"vuex_state": "string",
|
|
"received_at": "Int64",
|
|
"batch_order_number": "Int64"
|
|
}
|
|
dtypes_sessions = {'sessionid': 'Int64',
|
|
'user_agent': 'string',
|
|
'user_browser': 'string',
|
|
'user_browser_version': 'string',
|
|
'user_country': 'string',
|
|
'user_device': 'string',
|
|
'user_device_heap_size': 'Int64',
|
|
'user_device_memory_size': 'Int64',
|
|
'user_device_type': 'string',
|
|
'user_os': 'string',
|
|
'user_os_version': 'string',
|
|
'user_uuid': 'string',
|
|
'connection_effective_bandwidth': 'Int64',
|
|
'connection_type': 'string',
|
|
'metadata_key': 'string',
|
|
'metadata_value': 'string',
|
|
'referrer': 'string',
|
|
'user_anonymous_id': 'string',
|
|
'user_id': 'string',
|
|
'session_start_timestamp': 'Int64',
|
|
'session_end_timestamp': 'Int64',
|
|
'session_duration': 'Int64',
|
|
'first_contentful_paint': 'Int64',
|
|
'speed_index': 'Int64',
|
|
'visually_complete': 'Int64',
|
|
'timing_time_to_interactive': 'Int64',
|
|
'avg_cpu': 'Int64',
|
|
'avg_fps': 'Int64',
|
|
'max_cpu': 'Int64',
|
|
'max_fps': 'Int64',
|
|
'max_total_js_heap_size': 'Int64',
|
|
'max_used_js_heap_size': 'Int64',
|
|
'js_exceptions_count': 'Int64',
|
|
'long_tasks_total_duration': 'Int64',
|
|
'long_tasks_max_duration': 'Int64',
|
|
'long_tasks_count': 'Int64',
|
|
'inputs_count': 'Int64',
|
|
'clicks_count': 'Int64',
|
|
'issues_count': 'Int64',
|
|
'issues': 'object',
|
|
'urls_count': 'Int64',
|
|
'urls': 'object'}
|
|
|
|
if DATABASE == 'bigquery':
|
|
dtypes_sessions['urls'] = 'string'
|
|
dtypes_sessions['issues'] = 'string'
|
|
|
|
detailed_events_col = []
|
|
for col in DetailedEvent.__dict__:
|
|
if not col.startswith('_'):
|
|
detailed_events_col.append(col)
|
|
|
|
events_col = []
|
|
for col in Event.__dict__:
|
|
if not col.startswith('_'):
|
|
events_col.append(col)
|
|
|
|
sessions_col = []
|
|
for col in Session.__dict__:
|
|
if not col.startswith('_'):
|
|
sessions_col.append(col)
|
|
|
|
|
|
def get_df_from_batch(batch, level):
|
|
if level == 'normal':
|
|
df = pd.DataFrame([b.__dict__ for b in batch], columns=events_col)
|
|
if level == 'detailed':
|
|
df = pd.DataFrame([b.__dict__ for b in batch], columns=detailed_events_col)
|
|
if level == 'sessions':
|
|
df = pd.DataFrame([b.__dict__ for b in batch], columns=sessions_col)
|
|
|
|
try:
|
|
df = df.drop('_sa_instance_state', axis=1)
|
|
except KeyError:
|
|
pass
|
|
|
|
if level == 'normal':
|
|
df = df.astype(dtypes_events)
|
|
if level == 'detailed':
|
|
df['inputevent_value'] = None
|
|
df['customevent_payload'] = None
|
|
df = df.astype(dtypes_detailed_events)
|
|
if level == 'sessions':
|
|
df = df.astype(dtypes_sessions)
|
|
|
|
if DATABASE == 'clickhouse' and level == 'sessions':
|
|
df['issues'] = df['issues'].fillna('')
|
|
df['urls'] = df['urls'].fillna('')
|
|
|
|
for x in df.columns:
|
|
try:
|
|
if df[x].dtype == 'string':
|
|
df[x] = df[x].str.slice(0, 255)
|
|
df[x] = df[x].str.replace("|", "")
|
|
except TypeError as e:
|
|
print(repr(e))
|
|
if df[x].dtype == 'str':
|
|
df[x] = df[x].str.slice(0, 255)
|
|
df[x] = df[x].str.replace("|", "")
|
|
return df
|