Redshift connector (#1170)
* Updated dependancies for redshift connector, changed os module for python-decouple module
* Updated service and images
* Updated message protocol, added exception for BatchMetadata when version is 0 (we apply old read method)
* fixed load error from s3 to redshift. null values for string columns are now empty strings ("")
* Added file test consumer_async.py: reads every 3 minutes kafka raw and send task in background to upload to cloud
* Added method to skip messages that are not inserted to cloud
* Added logs into consumer_async. Changed urls and issues in sessions table from list to string
* Split between messages for sessions table and for events table
* Updated redshift tables
* Fixed small issue in query redshift_sessions.sql
* Updated Dockerfiles. Cleaned logs of consumer_async. Updated/Fixed tables. Transformed Nan as NULL for VARCHAR columns
* Added error handler for sql dropped connection
* chore(docker): Optimize docker builds
Signed-off-by: rjshrjndrn <rjshrjndrn@gmail.com>
* Variables renamed
* Adding compression libraries
* Set default value of count events to 0 (instead of NULL) when event did not occur
* Added support specific project tracking. Added PG handler to connect to sessions table
* Added method to update values in db connection for sessions ended and restarted
* Removing intelligent file copying
* chore(connector): Build file
Signed-off-by: rjshrjndrn <rjshrjndrn@gmail.com>
* Adding connection pool for pg
* Renaming and optimizing
* Fixed issue of missing information of sessions
---------
Signed-off-by: rjshrjndrn <rjshrjndrn@gmail.com>
Co-authored-by: rjshrjndrn <rjshrjndrn@gmail.com>
This commit is contained in:
parent
28ab64595f
commit
28182b951e
25 changed files with 1363 additions and 1217 deletions
3
ee/connectors/Readme.md
Normal file
3
ee/connectors/Readme.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
## Build
|
||||
|
||||
docker build -f deploy/Dockerfile_redshift -t {tag} .
|
||||
|
|
@ -1,19 +1,28 @@
|
|||
import os
|
||||
from decouple import config
|
||||
from confluent_kafka import Consumer
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import json
|
||||
from time import time
|
||||
|
||||
#from msgcodec.codec import MessageCodec
|
||||
from msgcodec.msgcodec import MessageCodec
|
||||
from msgcodec.messages import SessionEnd
|
||||
print('[INFO] Importing DBConnection...')
|
||||
from db.api import DBConnection
|
||||
print('[INFO] Importing from models..')
|
||||
from db.models import events_detailed_table_name, events_table_name, sessions_table_name
|
||||
print('[INFO] Importing from writer..')
|
||||
from db.writer import insert_batch
|
||||
print('[INFO] Importing from handler..')
|
||||
from handler import handle_message, handle_normal_message, handle_session
|
||||
|
||||
DATABASE = os.environ['DATABASE_NAME']
|
||||
LEVEL = os.environ['level']
|
||||
DATABASE = config('DATABASE_NAME')
|
||||
LEVEL = config('LEVEL')
|
||||
|
||||
print(f'[INFO] Connecting to database {DATABASE}')
|
||||
db = DBConnection(DATABASE)
|
||||
print('Connected successfully')
|
||||
|
||||
if LEVEL == 'detailed':
|
||||
table_name = events_detailed_table_name
|
||||
|
|
@ -22,30 +31,34 @@ elif LEVEL == 'normal':
|
|||
|
||||
|
||||
def main():
|
||||
batch_size = 4000
|
||||
sessions_batch_size = 400
|
||||
batch_size = config('events_batch_size', default=4000, cast=int)
|
||||
sessions_batch_size = config('sessions_batch_size', default=400, cast=int)
|
||||
batch = []
|
||||
sessions = defaultdict(lambda: None)
|
||||
sessions_batch = []
|
||||
|
||||
codec = MessageCodec()
|
||||
consumer = Consumer({
|
||||
"security.protocol": "SSL",
|
||||
"bootstrap.servers": ",".join([os.environ['KAFKA_SERVER_1'],
|
||||
os.environ['KAFKA_SERVER_2']]),
|
||||
ssl_protocol = config('SSL_ENABLED', default=True, cast=bool)
|
||||
consumer_settings = {
|
||||
"bootstrap.servers": config('KAFKA_SERVER'),
|
||||
"group.id": f"connector_{DATABASE}",
|
||||
"auto.offset.reset": "earliest",
|
||||
"enable.auto.commit": False
|
||||
})
|
||||
}
|
||||
if ssl_protocol:
|
||||
consumer_settings['security.protocol'] = 'SSL'
|
||||
consumer = Consumer(consumer_settings)
|
||||
|
||||
consumer.subscribe(["raw", "raw_ios"])
|
||||
consumer.subscribe([config("topic", default="saas-raw")])
|
||||
print("Kafka consumer subscribed")
|
||||
t_ = time()
|
||||
while True:
|
||||
msg.consumer.poll(1.0)
|
||||
msg = consumer.poll(1.0)
|
||||
if msg is None:
|
||||
continue
|
||||
messages = codec.decode_detailed(msg.value)
|
||||
session_id = codec.decode_key(msg.key)
|
||||
#value = json.loads(msg.value().decode('utf-8'))
|
||||
messages = codec.decode_detailed(msg.value())
|
||||
session_id = codec.decode_key(msg.key())
|
||||
if messages is None:
|
||||
print('-')
|
||||
continue
|
||||
|
|
@ -68,7 +81,11 @@ def main():
|
|||
|
||||
# try to insert sessions
|
||||
if len(sessions_batch) >= sessions_batch_size:
|
||||
t2 = time()
|
||||
attempt_session_insert(sessions_batch)
|
||||
t2_ = time()
|
||||
print(f'[INFO] Inserted sessions into Redshift - time spent: {t2_-t2}')
|
||||
t_ += t2_-t2
|
||||
for s in sessions_batch:
|
||||
try:
|
||||
del sessions[s.sessionid]
|
||||
|
|
@ -86,10 +103,15 @@ def main():
|
|||
|
||||
# insert a batch of events
|
||||
if len(batch) >= batch_size:
|
||||
t1 = time()
|
||||
print(f'[INFO] Spent time filling ({batch_size})-batch: {t1-t_}')
|
||||
attempt_batch_insert(batch)
|
||||
t1_ = time()
|
||||
t_ = t1_
|
||||
print(f'[INFO] Inserted events into Redshift - time spent: {t1_-t1}')
|
||||
batch = []
|
||||
consumer.commit()
|
||||
print("sessions in cache:", len(sessions))
|
||||
print("[INFO] sessions in cache:", len(sessions))
|
||||
|
||||
|
||||
def attempt_session_insert(sess_batch):
|
||||
|
|
@ -134,4 +156,6 @@ def decode_key(b) -> int:
|
|||
return decoded
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('[INFO] Setup complete')
|
||||
print('[INFO] Starting script')
|
||||
main()
|
||||
|
|
|
|||
229
ee/connectors/consumer_async.py
Normal file
229
ee/connectors/consumer_async.py
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
from numpy._typing import _16Bit
|
||||
from decouple import config, Csv
|
||||
from confluent_kafka import Consumer
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import json
|
||||
import asyncio
|
||||
from time import time, sleep
|
||||
from copy import deepcopy
|
||||
|
||||
from msgcodec.msgcodec import MessageCodec
|
||||
from msgcodec.messages import SessionStart, SessionEnd
|
||||
from db.api import DBConnection
|
||||
from db.models import events_detailed_table_name, events_table_name, sessions_table_name
|
||||
from db.writer import insert_batch, update_batch
|
||||
from handler import handle_message, handle_normal_message, handle_session
|
||||
from utils.cache import ProjectFilter as PF
|
||||
from utils import pg_client
|
||||
|
||||
from psycopg2 import InterfaceError
|
||||
|
||||
def process_message(msg, codec, sessions, batch, sessions_batch, interesting_sessions, interesting_events, EVENT_TYPE, projectFilter):
|
||||
if msg is None:
|
||||
return
|
||||
messages = codec.decode_detailed(msg.value())
|
||||
session_id = codec.decode_key(msg.key())
|
||||
if messages is None:
|
||||
print('-')
|
||||
return
|
||||
elif not projectFilter.is_valid(session_id):
|
||||
# We check using projectFilter if session_id is from the selected projects
|
||||
return
|
||||
|
||||
for message in messages:
|
||||
if message.__id__ in interesting_events:
|
||||
if EVENT_TYPE == 'detailed':
|
||||
n = handle_message(message)
|
||||
elif EVENT_TYPE == 'normal':
|
||||
n = handle_normal_message(message)
|
||||
if message.__id__ in interesting_sessions:
|
||||
|
||||
# Here we create the session if not exists or append message event if session exists
|
||||
sessions[session_id] = handle_session(sessions[session_id], message)
|
||||
if sessions[session_id]:
|
||||
sessions[session_id].sessionid = session_id
|
||||
projectFilter.cached_sessions.add(session_id)
|
||||
|
||||
if isinstance(message, SessionEnd):
|
||||
# Here only if session exists and we get sessionend we start cleanup
|
||||
if sessions[session_id].session_start_timestamp:
|
||||
projectFilter.handle_clean()
|
||||
old_status = projectFilter.cached_sessions.close(session_id)
|
||||
sessions_batch.append((old_status, deepcopy(sessions[session_id])))
|
||||
sessions_to_delete = projectFilter.cached_sessions.clear_sessions()
|
||||
for sess_id in sessions_to_delete:
|
||||
try:
|
||||
del sessions[sess_id]
|
||||
except KeyError:
|
||||
print('[INFO] Session already deleted')
|
||||
else:
|
||||
print('[WARN] Session not started received SessionEnd message')
|
||||
del sessions[session_id]
|
||||
|
||||
if message.__id__ in interesting_events:
|
||||
if n:
|
||||
n.sessionid = session_id
|
||||
n.received_at = int(datetime.now().timestamp() * 1000)
|
||||
n.batch_order_number = len(batch)
|
||||
batch.append(n)
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
def attempt_session_insert(sess_batch, db, sessions_table_name, try_=0):
|
||||
if sess_batch:
|
||||
try:
|
||||
print("inserting sessions...")
|
||||
insert_batch(db, sess_batch, table=sessions_table_name, level='sessions')
|
||||
print("inserted sessions succesfully")
|
||||
except TypeError as e:
|
||||
print("Type conversion error")
|
||||
print(repr(e))
|
||||
except ValueError as e:
|
||||
print("Message value could not be processed or inserted correctly")
|
||||
print(repr(e))
|
||||
except InterfaceError as e:
|
||||
if try_ < 3:
|
||||
try_ += 1
|
||||
sleep(try_*2)
|
||||
attempt_session_insert(sess_batch, db, sessions_table_name, try_)
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
|
||||
|
||||
def attempt_session_update(sess_batch, db, sessions_table_name):
|
||||
if sess_batch:
|
||||
try:
|
||||
print('updating sessions')
|
||||
update_batch(db, sess_batch, table=sessions_table_name)
|
||||
except TypeError as e:
|
||||
print('Type conversion error')
|
||||
print(repr(e))
|
||||
except ValueError as e:
|
||||
print('Message value could not be processed or inserted correctly')
|
||||
print(repr(e))
|
||||
except InterfaceError as e:
|
||||
print('Error while trying to update session into datawarehouse')
|
||||
print(repr(e))
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
|
||||
|
||||
def attempt_batch_insert(batch, db, table_name, EVENT_TYPE, try_=0):
|
||||
# insert a batch
|
||||
try:
|
||||
print("inserting...")
|
||||
insert_batch(db=db, batch=batch, table=table_name, level=EVENT_TYPE)
|
||||
print("inserted succesfully")
|
||||
except TypeError as e:
|
||||
print("Type conversion error")
|
||||
print(repr(e))
|
||||
except ValueError as e:
|
||||
print("Message value could not be processed or inserted correctly")
|
||||
print(repr(e))
|
||||
except InterfaceError as e:
|
||||
if try_ < 3:
|
||||
try_ += 1
|
||||
sleep(try_*2)
|
||||
attempt_batch_insert(batch, db, table_name, EVENT_TYPE, try_)
|
||||
else:
|
||||
# TODO: Restart redshift
|
||||
print(repr(e))
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
|
||||
def decode_key(b) -> int:
|
||||
"""
|
||||
Decode the message key (encoded with little endian)
|
||||
"""
|
||||
try:
|
||||
decoded = int.from_bytes(b, "little", signed=False)
|
||||
except Exception as e:
|
||||
raise UnicodeDecodeError(f"Error while decoding message key (SessionID) from {b}\n{e}")
|
||||
return decoded
|
||||
|
||||
|
||||
async def main():
|
||||
await pg_client.init()
|
||||
DATABASE = config('CLOUD_SERVICE')
|
||||
EVENT_TYPE = config('EVENT_TYPE')
|
||||
|
||||
db = DBConnection(DATABASE)
|
||||
upload_rate = config('upload_rate', default=30, cast=int)
|
||||
|
||||
if EVENT_TYPE == 'detailed':
|
||||
table_name = events_detailed_table_name
|
||||
elif EVENT_TYPE == 'normal':
|
||||
table_name = events_table_name
|
||||
|
||||
batch = []
|
||||
sessions = defaultdict(lambda: None)
|
||||
sessions_batch = []
|
||||
|
||||
sessions_events_selection = [1,25,28,29,30,31,32,54,56,62,69,78,125,126]
|
||||
if EVENT_TYPE == 'normal':
|
||||
selected_events = [21,22,25,27,64,78,125]
|
||||
elif EVENT_TYPE == 'detailed':
|
||||
selected_events = [1,4,21,22,25,27,31,32,39,48,59,64,69,78,125,126]
|
||||
filter_events = list(set(sessions_events_selection+selected_events))
|
||||
|
||||
allowed_projects = config('PROJECT_IDS', default=None, cast=Csv(int))
|
||||
project_filter = PF(allowed_projects)
|
||||
codec = MessageCodec(filter_events)
|
||||
ssl_protocol = config('KAFKA_USE_SSL', default=True, cast=bool)
|
||||
consumer_settings = {
|
||||
"bootstrap.servers": config('KAFKA_SERVERS'),
|
||||
"group.id": f"connector_{DATABASE}",
|
||||
"auto.offset.reset": "earliest",
|
||||
"enable.auto.commit": False
|
||||
}
|
||||
if ssl_protocol:
|
||||
consumer_settings['security.protocol'] = 'SSL'
|
||||
consumer = Consumer(consumer_settings)
|
||||
|
||||
consumer.subscribe(config("TOPICS", default="saas-raw").split(','))
|
||||
print("[INFO] Kafka consumer subscribed")
|
||||
|
||||
c_time = time()
|
||||
read_msgs = 0
|
||||
while True:
|
||||
msg = consumer.poll(1.0)
|
||||
process_message(msg, codec, sessions, batch, sessions_batch, sessions_events_selection, selected_events, EVENT_TYPE, project_filter)
|
||||
read_msgs += 1
|
||||
if time() - c_time > upload_rate:
|
||||
print(f'[INFO] {read_msgs} kafka messages read in {upload_rate} seconds')
|
||||
await insertBatch(deepcopy(sessions_batch), deepcopy(batch), db, sessions_table_name, table_name, EVENT_TYPE)
|
||||
consumer.commit()
|
||||
sessions_batch = []
|
||||
batch = []
|
||||
read_msgs = 0
|
||||
c_time = time()
|
||||
|
||||
|
||||
|
||||
async def insertBatch(sessions_batch, batch, db, sessions_table_name, table_name, EVENT_TYPE):
|
||||
t1 = time()
|
||||
print(f'[BG-INFO] Number of events to add {len(batch)}, number of sessions to add {len(sessions_batch)}')
|
||||
new_sessions = list()
|
||||
updated_sessions = list()
|
||||
for old_status, session_in_batch in sessions_batch:
|
||||
if old_status == 'UPDATE':
|
||||
updated_sessions.append(session_in_batch)
|
||||
else:
|
||||
new_sessions.append(session_in_batch)
|
||||
print(f'[DEBUG] Number of new sessions {len(new_sessions)}, number of sessions to update {len(updated_sessions)}')
|
||||
if new_sessions != []:
|
||||
attempt_session_insert(new_sessions, db, sessions_table_name)
|
||||
|
||||
if updated_sessions != []:
|
||||
attempt_session_update(updated_sessions, db, sessions_table_name)
|
||||
|
||||
# insert a batch of events
|
||||
if batch != []:
|
||||
attempt_batch_insert(batch, db, table_name, EVENT_TYPE)
|
||||
print(f'[BG-INFO] Uploaded into S3 in {time()-t1} seconds')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
@ -3,10 +3,10 @@ from sqlalchemy import MetaData
|
|||
from sqlalchemy.orm import sessionmaker, session
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
import os
|
||||
from decouple import config as _config
|
||||
from pathlib import Path
|
||||
|
||||
DATABASE = os.environ['DATABASE_NAME']
|
||||
DATABASE = _config('CLOUD_SERVICE')
|
||||
if DATABASE == 'redshift':
|
||||
import pandas_redshift as pr
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ class DBConnection:
|
|||
"""
|
||||
Initializes connection to a database
|
||||
To update models file use:
|
||||
sqlacodegen --outfile models_universal.py mysql+pymysql://{user}:{pwd}@{address}
|
||||
sqlacodegen --outfile models_universal.py mysql+pymysql://{USER}:{pwd}@{HOST}
|
||||
"""
|
||||
_sessions = sessionmaker()
|
||||
|
||||
|
|
@ -44,53 +44,67 @@ class DBConnection:
|
|||
|
||||
if config == 'redshift':
|
||||
self.pdredshift = pr
|
||||
self.pdredshift.connect_to_redshift(dbname=os.environ['schema'],
|
||||
host=os.environ['address'],
|
||||
port=os.environ['port'],
|
||||
user=os.environ['user'],
|
||||
password=os.environ['password'])
|
||||
ci = _config('cluster_info', default='')
|
||||
cluster_info = dict()
|
||||
if ci == '':
|
||||
cluster_info['USER'] = _config('USER')
|
||||
cluster_info['HOST'] = _config('HOST')
|
||||
cluster_info['PORT'] = _config('PORT')
|
||||
cluster_info['PASSWORD'] = _config('PASSWORD')
|
||||
cluster_info['DBNAME'] = _config('DBNAME')
|
||||
else:
|
||||
ci = ci.split(' ')
|
||||
cluster_info = dict()
|
||||
for _d in ci:
|
||||
k,v = _d.split('=')
|
||||
cluster_info[k]=v
|
||||
self.pdredshift.connect_to_redshift(dbname=cluster_info['DBNAME'],
|
||||
host=cluster_info['HOST'],
|
||||
port=cluster_info['PORT'],
|
||||
user=cluster_info['USER'],
|
||||
password=cluster_info['PASSWORD'])
|
||||
|
||||
self.pdredshift.connect_to_s3(aws_access_key_id=os.environ['aws_access_key_id'],
|
||||
aws_secret_access_key=os.environ['aws_secret_access_key'],
|
||||
bucket=os.environ['bucket'],
|
||||
subdirectory=os.environ['subdirectory'])
|
||||
self.pdredshift.connect_to_s3(aws_access_key_id=_config('AWS_ACCESS_KEY_ID'),
|
||||
aws_secret_access_key=_config('AWS_SECRET_ACCESS_KEY'),
|
||||
bucket=_config('BUCKET'),
|
||||
subdirectory=_config('SUBDIRECTORY', default=None))
|
||||
|
||||
self.connect_str = os.environ['connect_str'].format(
|
||||
user=os.environ['user'],
|
||||
password=os.environ['password'],
|
||||
address=os.environ['address'],
|
||||
port=os.environ['port'],
|
||||
schema=os.environ['schema']
|
||||
self.CONNECTION_STRING = _config('CONNECTION_STRING').format(
|
||||
USER=cluster_info['USER'],
|
||||
PASSWORD=cluster_info['PASSWORD'],
|
||||
HOST=cluster_info['HOST'],
|
||||
PORT=cluster_info['PORT'],
|
||||
DBNAME=cluster_info['DBNAME']
|
||||
)
|
||||
self.engine = create_engine(self.connect_str)
|
||||
self.engine = create_engine(self.CONNECTION_STRING)
|
||||
|
||||
elif config == 'clickhouse':
|
||||
self.connect_str = os.environ['connect_str'].format(
|
||||
address=os.environ['address'],
|
||||
database=os.environ['database']
|
||||
self.CONNECTION_STRING = _config('CONNECTION_STRING').format(
|
||||
HOST=_config('HOST'),
|
||||
DATABASE=_config('DATABASE')
|
||||
)
|
||||
self.engine = create_engine(self.connect_str)
|
||||
self.engine = create_engine(self.CONNECTION_STRING)
|
||||
elif config == 'pg':
|
||||
self.connect_str = os.environ['connect_str'].format(
|
||||
user=os.environ['user'],
|
||||
password=os.environ['password'],
|
||||
address=os.environ['address'],
|
||||
port=os.environ['port'],
|
||||
database=os.environ['database']
|
||||
self.CONNECTION_STRING = _config('CONNECTION_STRING').format(
|
||||
USER=_config('USER'),
|
||||
PASSWORD=_config('PASSWORD'),
|
||||
HOST=_config('HOST'),
|
||||
PORT=_config('PORT'),
|
||||
DATABASE=_config('DATABASE')
|
||||
)
|
||||
self.engine = create_engine(self.connect_str)
|
||||
self.engine = create_engine(self.CONNECTION_STRING)
|
||||
elif config == 'bigquery':
|
||||
pass
|
||||
elif config == 'snowflake':
|
||||
self.connect_str = os.environ['connect_str'].format(
|
||||
user=os.environ['user'],
|
||||
password=os.environ['password'],
|
||||
account=os.environ['account'],
|
||||
database=os.environ['database'],
|
||||
schema = os.environ['schema'],
|
||||
warehouse = os.environ['warehouse']
|
||||
self.CONNECTION_STRING = _config('CONNECTION_STRING').format(
|
||||
USER=_config('USER'),
|
||||
PASSWORD=_config('PASSWORD'),
|
||||
ACCOUNT=_config('ACCOUNT'),
|
||||
DATABASE=_config('DATABASE'),
|
||||
DBNAME = _config('DBNAME'),
|
||||
WAREHOUSE = _config('WAREHOUSE')
|
||||
)
|
||||
self.engine = create_engine(self.connect_str)
|
||||
self.engine = create_engine(self.CONNECTION_STRING)
|
||||
else:
|
||||
raise ValueError("This db configuration doesn't exist. Add into keys file.")
|
||||
|
||||
|
|
@ -128,3 +142,10 @@ class DBConnection:
|
|||
my_session.close()
|
||||
connection.close()
|
||||
|
||||
def restart(self):
|
||||
self.close()
|
||||
self.__init__(config=self.config)
|
||||
|
||||
def close(self):
|
||||
if self.config == 'redshift':
|
||||
self.pdredshift.close_up_shop()
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
from sqlalchemy import BigInteger, Boolean, Column, Integer, ARRAY, VARCHAR, text, VARCHAR
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from pathlib import Path
|
||||
import os
|
||||
from decouple import config
|
||||
|
||||
DATABASE = os.environ['DATABASE_NAME']
|
||||
DATABASE = config('CLOUD_SERVICE')
|
||||
|
||||
Base = declarative_base()
|
||||
metadata = Base.metadata
|
||||
|
|
@ -13,16 +13,16 @@ base_path = Path(__file__).parent.parent
|
|||
|
||||
# Get a table name from a configuration file
|
||||
try:
|
||||
events_table_name = os.environ['events_table_name']
|
||||
events_table_name = config('EVENTS_TABLE_NAME', default='connector_events')
|
||||
except KeyError as e:
|
||||
events_table_name = None
|
||||
print(repr(e))
|
||||
try:
|
||||
events_detailed_table_name = os.environ['events_detailed_table_name']
|
||||
events_detailed_table_name = config('EVENTS_DETAILED_TABLE_NAME', default='connector_events_detailed')
|
||||
except KeyError as e:
|
||||
print(repr(e))
|
||||
events_detailed_table_name = None
|
||||
sessions_table_name = os.environ['sessions_table']
|
||||
sessions_table_name = config('SESSIONS_TABLE', default='connector_user_sessions')
|
||||
|
||||
|
||||
class Session(Base):
|
||||
|
|
@ -69,65 +69,42 @@ class Session(Base):
|
|||
|
||||
# ISSUES AND EVENTS
|
||||
js_exceptions_count = Column(BigInteger)
|
||||
long_tasks_total_duration = Column(BigInteger)
|
||||
long_tasks_max_duration = Column(BigInteger)
|
||||
long_tasks_count = Column(BigInteger)
|
||||
#long_tasks_total_duration = Column(BigInteger)
|
||||
#long_tasks_max_duration = Column(BigInteger)
|
||||
#long_tasks_count = Column(BigInteger)
|
||||
inputs_count = Column(BigInteger)
|
||||
clicks_count = Column(BigInteger)
|
||||
issues_count = Column(BigInteger)
|
||||
issues = ARRAY(VARCHAR(5000))
|
||||
urls_count = Column(BigInteger)
|
||||
urls = ARRAY(VARCHAR(5000))
|
||||
|
||||
|
||||
class Event(Base):
|
||||
__tablename__ = events_table_name
|
||||
|
||||
sessionid = Column(BigInteger, primary_key=True)
|
||||
connectioninformation_downlink = Column(BigInteger)
|
||||
connectioninformation_type = Column(VARCHAR(5000))
|
||||
consolelog_level = Column(VARCHAR(5000))
|
||||
consolelog_value = Column(VARCHAR(5000))
|
||||
customevent_messageid = Column(BigInteger)
|
||||
customevent_name = Column(VARCHAR(5000))
|
||||
customevent_payload = Column(VARCHAR(5000))
|
||||
customevent_timestamp = Column(BigInteger)
|
||||
errorevent_message = Column(VARCHAR(5000))
|
||||
errorevent_messageid = Column(BigInteger)
|
||||
errorevent_name = Column(VARCHAR(5000))
|
||||
errorevent_payload = Column(VARCHAR(5000))
|
||||
errorevent_source = Column(VARCHAR(5000))
|
||||
errorevent_timestamp = Column(BigInteger)
|
||||
jsexception_message = Column(VARCHAR(5000))
|
||||
jsexception_name = Column(VARCHAR(5000))
|
||||
jsexception_payload = Column(VARCHAR(5000))
|
||||
metadata_key = Column(VARCHAR(5000))
|
||||
metadata_value = Column(VARCHAR(5000))
|
||||
mouseclick_id = Column(BigInteger)
|
||||
mouseclick_hesitationtime = Column(BigInteger)
|
||||
mouseclick_label = Column(VARCHAR(5000))
|
||||
pageevent_firstcontentfulpaint = Column(BigInteger)
|
||||
pageevent_firstpaint = Column(BigInteger)
|
||||
pageevent_messageid = Column(BigInteger)
|
||||
pageevent_referrer = Column(VARCHAR(5000))
|
||||
pageevent_speedindex = Column(BigInteger)
|
||||
pageevent_timestamp = Column(BigInteger)
|
||||
pageevent_url = Column(VARCHAR(5000))
|
||||
pagerendertiming_timetointeractive = Column(BigInteger)
|
||||
pagerendertiming_visuallycomplete = Column(BigInteger)
|
||||
rawcustomevent_name = Column(VARCHAR(5000))
|
||||
rawcustomevent_payload = Column(VARCHAR(5000))
|
||||
setviewportsize_height = Column(BigInteger)
|
||||
setviewportsize_width = Column(BigInteger)
|
||||
timestamp_timestamp = Column(BigInteger)
|
||||
user_anonymous_id = Column(VARCHAR(5000))
|
||||
user_id = Column(VARCHAR(5000))
|
||||
issueevent_messageid = Column(BigInteger)
|
||||
jsexception_metadata = Column(VARCHAR(5000))
|
||||
networkrequest_type = Column(VARCHAR(5000))
|
||||
networkrequest_method = Column(VARCHAR(5000))
|
||||
networkrequest_url = Column(VARCHAR(5000))
|
||||
networkrequest_request = Column(VARCHAR(5000))
|
||||
networkrequest_response = Column(VARCHAR(5000))
|
||||
networkrequest_status = Column(BigInteger)
|
||||
networkrequest_timestamp = Column(BigInteger)
|
||||
networkrequest_duration = Column(BigInteger)
|
||||
issueevent_message_id = Column(BigInteger)
|
||||
issueevent_timestamp = Column(BigInteger)
|
||||
issueevent_type = Column(VARCHAR(5000))
|
||||
issueevent_contextstring = Column(VARCHAR(5000))
|
||||
issueevent_context_string = Column(VARCHAR(5000))
|
||||
issueevent_context = Column(VARCHAR(5000))
|
||||
issueevent_payload = Column(VARCHAR(5000))
|
||||
issueevent_url = Column(VARCHAR(5000))
|
||||
customissue_name = Column(VARCHAR(5000))
|
||||
customissue_payload = Column(VARCHAR(5000))
|
||||
received_at = Column(BigInteger)
|
||||
|
|
@ -137,7 +114,6 @@ class Event(Base):
|
|||
class DetailedEvent(Base):
|
||||
__tablename__ = events_detailed_table_name
|
||||
|
||||
# id = Column(Integer, primary_key=True, server_default=text("\"identity\"(119029, 0, '0,1'::text)"))
|
||||
sessionid = Column(BigInteger, primary_key=True)
|
||||
clickevent_hesitationtime = Column(BigInteger)
|
||||
clickevent_label = Column(VARCHAR(5000))
|
||||
|
|
@ -147,28 +123,8 @@ class DetailedEvent(Base):
|
|||
connectioninformation_type = Column(VARCHAR(5000))
|
||||
consolelog_level = Column(VARCHAR(5000))
|
||||
consolelog_value = Column(VARCHAR(5000))
|
||||
cpuissue_duration = Column(BigInteger)
|
||||
cpuissue_rate = Column(BigInteger)
|
||||
cpuissue_timestamp = Column(BigInteger)
|
||||
createdocument = Column(Boolean)
|
||||
createelementnode_id = Column(BigInteger)
|
||||
createelementnode_parentid = Column(BigInteger)
|
||||
cssdeleterule_index = Column(BigInteger)
|
||||
cssdeleterule_stylesheetid = Column(BigInteger)
|
||||
cssinsertrule_index = Column(BigInteger)
|
||||
cssinsertrule_rule = Column(VARCHAR(5000))
|
||||
cssinsertrule_stylesheetid = Column(BigInteger)
|
||||
customevent_messageid = Column(BigInteger)
|
||||
customevent_name = Column(VARCHAR(5000))
|
||||
customevent_payload = Column(VARCHAR(5000))
|
||||
customevent_timestamp = Column(BigInteger)
|
||||
domdrop_timestamp = Column(BigInteger)
|
||||
errorevent_message = Column(VARCHAR(5000))
|
||||
errorevent_messageid = Column(BigInteger)
|
||||
errorevent_name = Column(VARCHAR(5000))
|
||||
errorevent_payload = Column(VARCHAR(5000))
|
||||
errorevent_source = Column(VARCHAR(5000))
|
||||
errorevent_timestamp = Column(BigInteger)
|
||||
fetch_duration = Column(BigInteger)
|
||||
fetch_method = Column(VARCHAR(5000))
|
||||
fetch_request = Column(VARCHAR(5000))
|
||||
|
|
@ -180,9 +136,6 @@ class DetailedEvent(Base):
|
|||
graphql_operationname = Column(VARCHAR(5000))
|
||||
graphql_response = Column(VARCHAR(5000))
|
||||
graphql_variables = Column(VARCHAR(5000))
|
||||
graphqlevent_messageid = Column(BigInteger)
|
||||
graphqlevent_name = Column(VARCHAR(5000))
|
||||
graphqlevent_timestamp = Column(BigInteger)
|
||||
inputevent_label = Column(VARCHAR(5000))
|
||||
inputevent_messageid = Column(BigInteger)
|
||||
inputevent_timestamp = Column(BigInteger)
|
||||
|
|
@ -191,26 +144,18 @@ class DetailedEvent(Base):
|
|||
jsexception_message = Column(VARCHAR(5000))
|
||||
jsexception_name = Column(VARCHAR(5000))
|
||||
jsexception_payload = Column(VARCHAR(5000))
|
||||
memoryissue_duration = Column(BigInteger)
|
||||
memoryissue_rate = Column(BigInteger)
|
||||
memoryissue_timestamp = Column(BigInteger)
|
||||
metadata_key = Column(VARCHAR(5000))
|
||||
metadata_value = Column(VARCHAR(5000))
|
||||
mobx_payload = Column(VARCHAR(5000))
|
||||
mobx_type = Column(VARCHAR(5000))
|
||||
jsexception_metadata = Column(VARCHAR(5000))
|
||||
mouseclick_id = Column(BigInteger)
|
||||
mouseclick_hesitationtime = Column(BigInteger)
|
||||
mouseclick_label = Column(VARCHAR(5000))
|
||||
mousemove_x = Column(BigInteger)
|
||||
mousemove_y = Column(BigInteger)
|
||||
movenode_id = Column(BigInteger)
|
||||
movenode_index = Column(BigInteger)
|
||||
movenode_parentid = Column(BigInteger)
|
||||
ngrx_action = Column(VARCHAR(5000))
|
||||
ngrx_duration = Column(BigInteger)
|
||||
ngrx_state = Column(VARCHAR(5000))
|
||||
otable_key = Column(VARCHAR(5000))
|
||||
otable_value = Column(VARCHAR(5000))
|
||||
networkrequest_type = Column(VARCHAR(5000))
|
||||
networkrequest_method = Column(VARCHAR(5000))
|
||||
networkrequest_url = Column(VARCHAR(5000))
|
||||
networkrequest_request = Column(VARCHAR(5000))
|
||||
networkrequest_response = Column(VARCHAR(5000))
|
||||
networkrequest_status = Column(BigInteger)
|
||||
networkrequest_timestamp = Column(BigInteger)
|
||||
networkrequest_duration = Column(BigInteger)
|
||||
pageevent_domcontentloadedeventend = Column(BigInteger)
|
||||
pageevent_domcontentloadedeventstart = Column(BigInteger)
|
||||
pageevent_firstcontentfulpaint = Column(BigInteger)
|
||||
|
|
@ -226,77 +171,8 @@ class DetailedEvent(Base):
|
|||
pageevent_speedindex = Column(BigInteger)
|
||||
pageevent_timestamp = Column(BigInteger)
|
||||
pageevent_url = Column(VARCHAR(5000))
|
||||
pageloadtiming_domcontentloadedeventend = Column(BigInteger)
|
||||
pageloadtiming_domcontentloadedeventstart = Column(BigInteger)
|
||||
pageloadtiming_firstcontentfulpaint = Column(BigInteger)
|
||||
pageloadtiming_firstpaint = Column(BigInteger)
|
||||
pageloadtiming_loadeventend = Column(BigInteger)
|
||||
pageloadtiming_loadeventstart = Column(BigInteger)
|
||||
pageloadtiming_requeststart = Column(BigInteger)
|
||||
pageloadtiming_responseend = Column(BigInteger)
|
||||
pageloadtiming_responsestart = Column(BigInteger)
|
||||
pagerendertiming_speedindex = Column(BigInteger)
|
||||
pagerendertiming_timetointeractive = Column(BigInteger)
|
||||
pagerendertiming_visuallycomplete = Column(BigInteger)
|
||||
performancetrack_frames = Column(BigInteger)
|
||||
performancetrack_ticks = Column(BigInteger)
|
||||
performancetrack_totaljsheapsize = Column(BigInteger)
|
||||
performancetrack_usedjsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_avgcpu = Column(BigInteger)
|
||||
performancetrackaggr_avgfps = Column(BigInteger)
|
||||
performancetrackaggr_avgtotaljsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_avgusedjsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_maxcpu = Column(BigInteger)
|
||||
performancetrackaggr_maxfps = Column(BigInteger)
|
||||
performancetrackaggr_maxtotaljsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_maxusedjsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_mincpu = Column(BigInteger)
|
||||
performancetrackaggr_minfps = Column(BigInteger)
|
||||
performancetrackaggr_mintotaljsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_minusedjsheapsize = Column(BigInteger)
|
||||
performancetrackaggr_timestampend = Column(BigInteger)
|
||||
performancetrackaggr_timestampstart = Column(BigInteger)
|
||||
profiler_args = Column(VARCHAR(5000))
|
||||
profiler_duration = Column(BigInteger)
|
||||
profiler_name = Column(VARCHAR(5000))
|
||||
profiler_result = Column(VARCHAR(5000))
|
||||
rawcustomevent_name = Column(VARCHAR(5000))
|
||||
rawcustomevent_payload = Column(VARCHAR(5000))
|
||||
rawerrorevent_message = Column(VARCHAR(5000))
|
||||
rawerrorevent_name = Column(VARCHAR(5000))
|
||||
rawerrorevent_payload = Column(VARCHAR(5000))
|
||||
rawerrorevent_source = Column(VARCHAR(5000))
|
||||
rawerrorevent_timestamp = Column(BigInteger)
|
||||
redux_action = Column(VARCHAR(5000))
|
||||
redux_duration = Column(BigInteger)
|
||||
redux_state = Column(VARCHAR(5000))
|
||||
removenode_id = Column(BigInteger)
|
||||
removenodeattribute_id = Column(BigInteger)
|
||||
removenodeattribute_name = Column(VARCHAR(5000))
|
||||
resourceevent_decodedbodysize = Column(BigInteger)
|
||||
resourceevent_duration = Column(BigInteger)
|
||||
resourceevent_encodedbodysize = Column(BigInteger)
|
||||
resourceevent_headersize = Column(BigInteger)
|
||||
resourceevent_messageid = Column(BigInteger)
|
||||
resourceevent_method = Column(VARCHAR(5000))
|
||||
resourceevent_status = Column(BigInteger)
|
||||
resourceevent_success = Column(Boolean)
|
||||
resourceevent_timestamp = Column(BigInteger)
|
||||
resourceevent_ttfb = Column(BigInteger)
|
||||
resourceevent_type = Column(VARCHAR(5000))
|
||||
resourceevent_url = Column(VARCHAR(5000))
|
||||
resourcetiming_decodedbodysize = Column(BigInteger)
|
||||
resourcetiming_duration = Column(BigInteger)
|
||||
resourcetiming_encodedbodysize = Column(BigInteger)
|
||||
resourcetiming_headersize = Column(BigInteger)
|
||||
resourcetiming_initiator = Column(VARCHAR(5000))
|
||||
resourcetiming_timestamp = Column(BigInteger)
|
||||
resourcetiming_ttfb = Column(BigInteger)
|
||||
resourcetiming_url = Column(VARCHAR(5000))
|
||||
sessiondisconnect = Column(Boolean)
|
||||
sessiondisconnect_timestamp = Column(BigInteger)
|
||||
sessionend = Column(Boolean)
|
||||
sessionend_timestamp = Column(BigInteger)
|
||||
sessionend_encryption_key = Column(VARCHAR(5000))
|
||||
sessionstart_projectid = Column(BigInteger)
|
||||
sessionstart_revid = Column(VARCHAR(5000))
|
||||
sessionstart_timestamp = Column(BigInteger)
|
||||
|
|
@ -312,65 +188,18 @@ class DetailedEvent(Base):
|
|||
sessionstart_useros = Column(VARCHAR(5000))
|
||||
sessionstart_userosversion = Column(VARCHAR(5000))
|
||||
sessionstart_useruuid = Column(VARCHAR(5000))
|
||||
setcssdata_data = Column(BigInteger)
|
||||
setcssdata_id = Column(BigInteger)
|
||||
setinputchecked_checked = Column(BigInteger)
|
||||
setinputchecked_id = Column(BigInteger)
|
||||
setinputtarget_id = Column(BigInteger)
|
||||
setinputtarget_label = Column(BigInteger)
|
||||
setinputvalue_id = Column(BigInteger)
|
||||
setinputvalue_mask = Column(BigInteger)
|
||||
setinputvalue_value = Column(BigInteger)
|
||||
setnodeattribute_id = Column(BigInteger)
|
||||
setnodeattribute_name = Column(BigInteger)
|
||||
setnodeattribute_value = Column(BigInteger)
|
||||
setnodedata_data = Column(BigInteger)
|
||||
setnodedata_id = Column(BigInteger)
|
||||
setnodescroll_id = Column(BigInteger)
|
||||
setnodescroll_x = Column(BigInteger)
|
||||
setnodescroll_y = Column(BigInteger)
|
||||
setpagelocation_navigationstart = Column(BigInteger)
|
||||
setpagelocation_referrer = Column(VARCHAR(5000))
|
||||
setpagelocation_url = Column(VARCHAR(5000))
|
||||
setpagevisibility_hidden = Column(Boolean)
|
||||
setviewportscroll_x = Column(BigInteger)
|
||||
setviewportscroll_y = Column(BigInteger)
|
||||
setviewportsize_height = Column(BigInteger)
|
||||
setviewportsize_width = Column(BigInteger)
|
||||
stateaction_type = Column(VARCHAR(5000))
|
||||
stateactionevent_messageid = Column(BigInteger)
|
||||
stateactionevent_timestamp = Column(BigInteger)
|
||||
stateactionevent_type = Column(VARCHAR(5000))
|
||||
timestamp_timestamp = Column(BigInteger)
|
||||
useranonymousid_id = Column(VARCHAR(5000))
|
||||
userid_id = Column(VARCHAR(5000))
|
||||
vuex_mutation = Column(VARCHAR(5000))
|
||||
vuex_state = Column(VARCHAR(5000))
|
||||
longtasks_timestamp = Column(BigInteger)
|
||||
longtasks_duration = Column(BigInteger)
|
||||
longtasks_context = Column(BigInteger)
|
||||
longtasks_containertype = Column(BigInteger)
|
||||
longtasks_containersrc = Column(VARCHAR(5000))
|
||||
longtasks_containerid = Column(VARCHAR(5000))
|
||||
longtasks_containername = Column(VARCHAR(5000))
|
||||
setnodeurlbasedattribute_id = Column(BigInteger)
|
||||
setnodeurlbasedattribute_name = Column(VARCHAR(5000))
|
||||
setnodeurlbasedattribute_value = Column(VARCHAR(5000))
|
||||
setnodeurlbasedattribute_baseurl = Column(VARCHAR(5000))
|
||||
setstyledata_id = Column(BigInteger)
|
||||
setstyledata_data = Column(VARCHAR(5000))
|
||||
setstyledata_baseurl = Column(VARCHAR(5000))
|
||||
issueevent_messageid = Column(BigInteger)
|
||||
issueevent_message_id = Column(BigInteger)
|
||||
issueevent_timestamp = Column(BigInteger)
|
||||
issueevent_type = Column(VARCHAR(5000))
|
||||
issueevent_contextstring = Column(VARCHAR(5000))
|
||||
issueevent_context_string = Column(VARCHAR(5000))
|
||||
issueevent_context = Column(VARCHAR(5000))
|
||||
issueevent_payload = Column(VARCHAR(5000))
|
||||
technicalinfo_type = Column(VARCHAR(5000))
|
||||
technicalinfo_value = Column(VARCHAR(5000))
|
||||
issueevent_url = Column(VARCHAR(5000))
|
||||
customissue_name = Column(VARCHAR(5000))
|
||||
customissue_payload = Column(VARCHAR(5000))
|
||||
pageclose = Column(Boolean)
|
||||
received_at = Column(BigInteger)
|
||||
batch_order_number = Column(BigInteger)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,86 +1,107 @@
|
|||
from pathlib import Path
|
||||
from decouple import config
|
||||
|
||||
base_path = Path(__file__).parent.parent
|
||||
|
||||
EVENT_TYPE = config('EVENT_TYPE', default='normal')
|
||||
|
||||
def create_tables_clickhouse(db):
|
||||
with open(base_path / 'sql' / 'clickhouse_events.sql') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
if EVENT_TYPE == 'normal':
|
||||
with open(base_path / 'sql' / 'clickhouse_events.sql', 'r') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'clickhouse_events_buffer.sql') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
print(f"`connector_user_events_buffer` table created succesfully.")
|
||||
with open(base_path / 'sql' / 'clickhouse_events_buffer.sql', 'r') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_user_events_buffer` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'clickhouse_sessions.sql') as f:
|
||||
with open(base_path / 'sql' / 'clickhouse_sessions.sql', 'r') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_sessions` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'clickhouse_sessions_buffer.sql') as f:
|
||||
with open(base_path / 'sql' / 'clickhouse_sessions_buffer.sql', 'r') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_sessions_buffer` table created succesfully.")
|
||||
|
||||
#with open(base_path / 'sql' / 'clickhouse_events_detailed.sql') as f:
|
||||
# q = f.read()
|
||||
#db.engine.execute(q)
|
||||
#print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
|
||||
#with open(base_path / 'sql' / 'clickhouse_events_detailed_buffer.sql') as f:
|
||||
# q = f.read()
|
||||
#db.engine.execute(q)
|
||||
#print(f"`connector_user_events_detailed_buffer` table created succesfully.")
|
||||
if EVENT_TYPE == 'detailed':
|
||||
with open(base_path / 'sql' / 'clickhouse_events_detailed.sql') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn: conn.execute(q)
|
||||
print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'clickhouse_events_detailed_buffer.sql') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn: conn.execute(q)
|
||||
print(f"`connector_user_events_detailed_buffer` table created succesfully.")
|
||||
|
||||
|
||||
def create_tables_postgres(db):
|
||||
with open(base_path / 'sql' / 'postgres_events.sql') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
if EVENT_TYPE == 'normal':
|
||||
with open(base_path / 'sql' / 'postgres_events.sql', 'r') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'postgres_sessions.sql') as f:
|
||||
with open(base_path / 'sql' / 'postgres_sessions.sql', 'r') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_sessions` table created succesfully.")
|
||||
|
||||
#with open(base_path / 'sql' / 'postgres_events_detailed.sql') as f:
|
||||
# q = f.read()
|
||||
#db.engine.execute(q)
|
||||
#print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
if EVENT_TYPE == 'detailed':
|
||||
with open(base_path / 'sql' / 'postgres_events_detailed.sql') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn: conn.execute(q)
|
||||
print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
|
||||
|
||||
def create_tables_snowflake(db):
|
||||
with open(base_path / 'sql' / 'snowflake_events.sql') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'snowflake_sessions.sql') as f:
|
||||
if EVENT_TYPE == 'normal':
|
||||
with open(base_path / 'sql' / 'snowflake_events.sql', 'r') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'snowflake_sessions.sql', 'r') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_sessions` table created succesfully.")
|
||||
|
||||
#with open(base_path / 'sql' / 'snowflake_events_detailed.sql') as f:
|
||||
# q = f.read()
|
||||
#db.engine.execute(q)
|
||||
#print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
if EVENT_TYPE == 'detailed':
|
||||
with open(base_path / 'sql' / 'snowflake_events_detailed.sql') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn: conn.execute(q)
|
||||
print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
|
||||
|
||||
def create_tables_redshift(db):
|
||||
with open(base_path / 'sql' / 'redshift_events.sql') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
if EVENT_TYPE == 'normal':
|
||||
with open(base_path / 'sql' / 'redshift_events.sql', 'r') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_user_events` table created succesfully.")
|
||||
|
||||
with open(base_path / 'sql' / 'redshift_sessions.sql') as f:
|
||||
with open(base_path / 'sql' / 'redshift_sessions.sql', 'r') as f:
|
||||
q = f.read()
|
||||
db.engine.execute(q)
|
||||
with db.get_live_session() as conn:
|
||||
conn.execute(q)
|
||||
print(f"`connector_sessions` table created succesfully.")
|
||||
|
||||
#with open(base_path / 'sql' / 'redshift_events_detailed.sql') as f:
|
||||
# q = f.read()
|
||||
#db.engine.execute(q)
|
||||
#print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
if EVENT_TYPE == 'detailed':
|
||||
with open(base_path / 'sql' / 'redshift_events_detailed.sql') as f:
|
||||
q = f.read()
|
||||
with db.get_live_session() as conn: conn.execute(q)
|
||||
print(f"`connector_user_events_detailed` table created succesfully.")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,334 +1,167 @@
|
|||
import pandas as pd
|
||||
from db.models import DetailedEvent, Event, Session, DATABASE
|
||||
|
||||
dtypes_events = {'sessionid': "Int64",
|
||||
'connectioninformation_downlink': "Int64",
|
||||
'connectioninformation_type': "string",
|
||||
'consolelog_level': "string",
|
||||
'consolelog_value': "string",
|
||||
'customevent_messageid': "Int64",
|
||||
'customevent_name': "string",
|
||||
'customevent_payload': "string",
|
||||
'customevent_timestamp': "Int64",
|
||||
'errorevent_message': "string",
|
||||
'errorevent_messageid': "Int64",
|
||||
'errorevent_name': "string",
|
||||
'errorevent_payload': "string",
|
||||
'errorevent_source': "string",
|
||||
'errorevent_timestamp': "Int64",
|
||||
'jsexception_message': "string",
|
||||
'jsexception_name': "string",
|
||||
'jsexception_payload': "string",
|
||||
'metadata_key': "string",
|
||||
'metadata_value': "string",
|
||||
'mouseclick_id': "Int64",
|
||||
'mouseclick_hesitationtime': "Int64",
|
||||
'mouseclick_label': "string",
|
||||
'pageevent_firstcontentfulpaint': "Int64",
|
||||
'pageevent_firstpaint': "Int64",
|
||||
'pageevent_messageid': "Int64",
|
||||
'pageevent_referrer': "string",
|
||||
'pageevent_speedindex': "Int64",
|
||||
'pageevent_timestamp': "Int64",
|
||||
'pageevent_url': "string",
|
||||
'pagerendertiming_timetointeractive': "Int64",
|
||||
'pagerendertiming_visuallycomplete': "Int64",
|
||||
'rawcustomevent_name': "string",
|
||||
'rawcustomevent_payload': "string",
|
||||
'setviewportsize_height': "Int64",
|
||||
'setviewportsize_width': "Int64",
|
||||
'timestamp_timestamp': "Int64",
|
||||
'user_anonymous_id': "string",
|
||||
'user_id': "string",
|
||||
'issueevent_messageid': "Int64",
|
||||
'issueevent_timestamp': "Int64",
|
||||
'issueevent_type': "string",
|
||||
'issueevent_contextstring': "string",
|
||||
'issueevent_context': "string",
|
||||
'issueevent_payload': "string",
|
||||
'customissue_name': "string",
|
||||
'customissue_payload': "string",
|
||||
'received_at': "Int64",
|
||||
'batch_order_number': "Int64"}
|
||||
dtypes_events = {
|
||||
'sessionid': "Int64",
|
||||
'consolelog_level': "string",
|
||||
'consolelog_value': "string",
|
||||
'customevent_name': "string",
|
||||
'customevent_payload': "string",
|
||||
'jsexception_message': "string",
|
||||
'jsexception_name': "string",
|
||||
'jsexception_payload': "string",
|
||||
'jsexception_metadata': "string",
|
||||
'networkrequest_type': "string",
|
||||
'networkrequest_method': "string",
|
||||
'networkrequest_url': "string",
|
||||
'networkrequest_request': "string",
|
||||
'networkrequest_response': "string",
|
||||
'networkrequest_status': "Int64",
|
||||
'networkrequest_timestamp': "Int64",
|
||||
'networkrequest_duration': "Int64",
|
||||
'issueevent_message_id': "Int64",
|
||||
'issueevent_timestamp': "Int64",
|
||||
'issueevent_type': "string",
|
||||
'issueevent_context_string': "string",
|
||||
'issueevent_context': "string",
|
||||
'issueevent_url': "string",
|
||||
'issueevent_payload': "string",
|
||||
'customissue_name': "string",
|
||||
'customissue_payload': "string",
|
||||
'received_at': "Int64",
|
||||
'batch_order_number': "Int64"}
|
||||
dtypes_detailed_events = {
|
||||
"sessionid": "Int64",
|
||||
"clickevent_hesitationtime": "Int64",
|
||||
"clickevent_label": "object",
|
||||
"clickevent_messageid": "Int64",
|
||||
"clickevent_timestamp": "Int64",
|
||||
"connectioninformation_downlink": "Int64",
|
||||
"connectioninformation_type": "object",
|
||||
"consolelog_level": "object",
|
||||
"consolelog_value": "object",
|
||||
"cpuissue_duration": "Int64",
|
||||
"cpuissue_rate": "Int64",
|
||||
"cpuissue_timestamp": "Int64",
|
||||
"createdocument": "boolean",
|
||||
"createelementnode_id": "Int64",
|
||||
"createelementnode_parentid": "Int64",
|
||||
"cssdeleterule_index": "Int64",
|
||||
"cssdeleterule_stylesheetid": "Int64",
|
||||
"cssinsertrule_index": "Int64",
|
||||
"cssinsertrule_rule": "object",
|
||||
"cssinsertrule_stylesheetid": "Int64",
|
||||
"customevent_messageid": "Int64",
|
||||
"customevent_name": "object",
|
||||
"customevent_payload": "object",
|
||||
"customevent_timestamp": "Int64",
|
||||
"domdrop_timestamp": "Int64",
|
||||
"errorevent_message": "object",
|
||||
"errorevent_messageid": "Int64",
|
||||
"errorevent_name": "object",
|
||||
"errorevent_payload": "object",
|
||||
"errorevent_source": "object",
|
||||
"errorevent_timestamp": "Int64",
|
||||
"fetch_duration": "Int64",
|
||||
"fetch_method": "object",
|
||||
"fetch_request": "object",
|
||||
"fetch_response": "object",
|
||||
"fetch_status": "Int64",
|
||||
"fetch_timestamp": "Int64",
|
||||
"fetch_url": "object",
|
||||
"graphql_operationkind": "object",
|
||||
"graphql_operationname": "object",
|
||||
"graphql_response": "object",
|
||||
"graphql_variables": "object",
|
||||
"graphqlevent_messageid": "Int64",
|
||||
"graphqlevent_name": "object",
|
||||
"graphqlevent_timestamp": "Int64",
|
||||
"inputevent_label": "object",
|
||||
"inputevent_messageid": "Int64",
|
||||
"inputevent_timestamp": "Int64",
|
||||
"inputevent_value": "object",
|
||||
"inputevent_valuemasked": "boolean",
|
||||
"jsexception_message": "object",
|
||||
"jsexception_name": "object",
|
||||
"jsexception_payload": "object",
|
||||
"longtasks_timestamp": "Int64",
|
||||
"longtasks_duration": "Int64",
|
||||
"longtasks_containerid": "object",
|
||||
"longtasks_containersrc": "object",
|
||||
"memoryissue_duration": "Int64",
|
||||
"memoryissue_rate": "Int64",
|
||||
"memoryissue_timestamp": "Int64",
|
||||
"metadata_key": "object",
|
||||
"metadata_value": "object",
|
||||
"mobx_payload": "object",
|
||||
"mobx_type": "object",
|
||||
"mouseclick_id": "Int64",
|
||||
"mouseclick_hesitationtime": "Int64",
|
||||
"mouseclick_label": "object",
|
||||
"mousemove_x": "Int64",
|
||||
"mousemove_y": "Int64",
|
||||
"movenode_id": "Int64",
|
||||
"movenode_index": "Int64",
|
||||
"movenode_parentid": "Int64",
|
||||
"ngrx_action": "object",
|
||||
"ngrx_duration": "Int64",
|
||||
"ngrx_state": "object",
|
||||
"otable_key": "object",
|
||||
"otable_value": "object",
|
||||
"pageevent_domcontentloadedeventend": "Int64",
|
||||
"pageevent_domcontentloadedeventstart": "Int64",
|
||||
"pageevent_firstcontentfulpaint": "Int64",
|
||||
"pageevent_firstpaint": "Int64",
|
||||
"pageevent_loaded": "boolean",
|
||||
"pageevent_loadeventend": "Int64",
|
||||
"pageevent_loadeventstart": "Int64",
|
||||
"pageevent_messageid": "Int64",
|
||||
"pageevent_referrer": "object",
|
||||
"pageevent_requeststart": "Int64",
|
||||
"pageevent_responseend": "Int64",
|
||||
"pageevent_responsestart": "Int64",
|
||||
"pageevent_speedindex": "Int64",
|
||||
"pageevent_timestamp": "Int64",
|
||||
"pageevent_url": "object",
|
||||
"pageloadtiming_domcontentloadedeventend": "Int64",
|
||||
"pageloadtiming_domcontentloadedeventstart": "Int64",
|
||||
"pageloadtiming_firstcontentfulpaint": "Int64",
|
||||
"pageloadtiming_firstpaint": "Int64",
|
||||
"pageloadtiming_loadeventend": "Int64",
|
||||
"pageloadtiming_loadeventstart": "Int64",
|
||||
"pageloadtiming_requeststart": "Int64",
|
||||
"pageloadtiming_responseend": "Int64",
|
||||
"pageloadtiming_responsestart": "Int64",
|
||||
"pagerendertiming_speedindex": "Int64",
|
||||
"pagerendertiming_timetointeractive": "Int64",
|
||||
"pagerendertiming_visuallycomplete": "Int64",
|
||||
"performancetrack_frames": "Int64",
|
||||
"performancetrack_ticks": "Int64",
|
||||
"performancetrack_totaljsheapsize": "Int64",
|
||||
"performancetrack_usedjsheapsize": "Int64",
|
||||
"performancetrackaggr_avgcpu": "Int64",
|
||||
"performancetrackaggr_avgfps": "Int64",
|
||||
"performancetrackaggr_avgtotaljsheapsize": "Int64",
|
||||
"performancetrackaggr_avgusedjsheapsize": "Int64",
|
||||
"performancetrackaggr_maxcpu": "Int64",
|
||||
"performancetrackaggr_maxfps": "Int64",
|
||||
"performancetrackaggr_maxtotaljsheapsize": "Int64",
|
||||
"performancetrackaggr_maxusedjsheapsize": "Int64",
|
||||
"performancetrackaggr_mincpu": "Int64",
|
||||
"performancetrackaggr_minfps": "Int64",
|
||||
"performancetrackaggr_mintotaljsheapsize": "Int64",
|
||||
"performancetrackaggr_minusedjsheapsize": "Int64",
|
||||
"performancetrackaggr_timestampend": "Int64",
|
||||
"performancetrackaggr_timestampstart": "Int64",
|
||||
"profiler_args": "object",
|
||||
"profiler_duration": "Int64",
|
||||
"profiler_name": "object",
|
||||
"profiler_result": "object",
|
||||
"rawcustomevent_name": "object",
|
||||
"rawcustomevent_payload": "object",
|
||||
"rawerrorevent_message": "object",
|
||||
"rawerrorevent_name": "object",
|
||||
"rawerrorevent_payload": "object",
|
||||
"rawerrorevent_source": "object",
|
||||
"rawerrorevent_timestamp": "Int64",
|
||||
"redux_action": "object",
|
||||
"redux_duration": "Int64",
|
||||
"redux_state": "object",
|
||||
"removenode_id": "Int64",
|
||||
"removenodeattribute_id": "Int64",
|
||||
"removenodeattribute_name": "object",
|
||||
"resourceevent_decodedbodysize": "Int64",
|
||||
"resourceevent_duration": "Int64",
|
||||
"resourceevent_encodedbodysize": "Int64",
|
||||
"resourceevent_headersize": "Int64",
|
||||
"resourceevent_messageid": "Int64",
|
||||
"resourceevent_method": "object",
|
||||
"resourceevent_status": "Int64",
|
||||
"resourceevent_success": "boolean",
|
||||
"resourceevent_timestamp": "Int64",
|
||||
"resourceevent_ttfb": "Int64",
|
||||
"resourceevent_type": "object",
|
||||
"resourceevent_url": "object",
|
||||
"resourcetiming_decodedbodysize": "Int64",
|
||||
"resourcetiming_duration": "Int64",
|
||||
"resourcetiming_encodedbodysize": "Int64",
|
||||
"resourcetiming_headersize": "Int64",
|
||||
"resourcetiming_initiator": "object",
|
||||
"resourcetiming_timestamp": "Int64",
|
||||
"resourcetiming_ttfb": "Int64",
|
||||
"resourcetiming_url": "object",
|
||||
"sessiondisconnect": "boolean",
|
||||
"sessiondisconnect_timestamp": "Int64",
|
||||
"sessionend": "boolean",
|
||||
"sessionend_timestamp": "Int64",
|
||||
"sessionstart_projectid": "Int64",
|
||||
"sessionstart_revid": "object",
|
||||
"sessionstart_timestamp": "Int64",
|
||||
"sessionstart_trackerversion": "object",
|
||||
"sessionstart_useragent": "object",
|
||||
"sessionstart_userbrowser": "object",
|
||||
"sessionstart_userbrowserversion": "object",
|
||||
"sessionstart_usercountry": "object",
|
||||
"sessionstart_userdevice": "object",
|
||||
"sessionstart_userdeviceheapsize": "Int64",
|
||||
"sessionstart_userdevicememorysize": "Int64",
|
||||
"sessionstart_userdevicetype": "object",
|
||||
"sessionstart_useros": "object",
|
||||
"sessionstart_userosversion": "object",
|
||||
"sessionstart_useruuid": "object",
|
||||
"setcssdata_data": "Int64",
|
||||
"setcssdata_id": "Int64",
|
||||
"setinputchecked_checked": "Int64",
|
||||
"setinputchecked_id": "Int64",
|
||||
"setinputtarget_id": "Int64",
|
||||
"setinputtarget_label": "Int64",
|
||||
"setinputvalue_id": "Int64",
|
||||
"setinputvalue_mask": "Int64",
|
||||
"setinputvalue_value": "Int64",
|
||||
"setnodeattribute_id": "Int64",
|
||||
"setnodeattribute_name": "Int64",
|
||||
"setnodeattribute_value": "Int64",
|
||||
"setnodedata_data": "Int64",
|
||||
"setnodedata_id": "Int64",
|
||||
"setnodescroll_id": "Int64",
|
||||
"setnodescroll_x": "Int64",
|
||||
"setnodescroll_y": "Int64",
|
||||
"setpagelocation_navigationstart": "Int64",
|
||||
"setpagelocation_referrer": "object",
|
||||
"setpagelocation_url": "object",
|
||||
"setpagevisibility_hidden": "boolean",
|
||||
"setviewportscroll_x": "Int64",
|
||||
"setviewportscroll_y": "Int64",
|
||||
"setviewportsize_height": "Int64",
|
||||
"setviewportsize_width": "Int64",
|
||||
"stateaction_type": "object",
|
||||
"stateactionevent_messageid": "Int64",
|
||||
"stateactionevent_timestamp": "Int64",
|
||||
"stateactionevent_type": "object",
|
||||
"timestamp_timestamp": "Int64",
|
||||
"useranonymousid_id": "object",
|
||||
"userid_id": "object",
|
||||
"vuex_mutation": "object",
|
||||
"vuex_state": "string",
|
||||
"received_at": "Int64",
|
||||
"batch_order_number": "Int64",
|
||||
|
||||
#NEW
|
||||
'setnodeurlbasedattribute_id': 'Int64',
|
||||
'setnodeurlbasedattribute_name': 'string',
|
||||
'setnodeurlbasedattribute_value': 'string',
|
||||
'setnodeurlbasedattribute_baseurl': 'string',
|
||||
'setstyledata_id': 'Int64',
|
||||
'setstyledata_data': 'string',
|
||||
'setstyledata_baseurl': 'string',
|
||||
'customissue_payload': 'string',
|
||||
'customissue_name': 'string',
|
||||
'technicalinfo_value': 'string',
|
||||
'technicalinfo_type': 'string',
|
||||
'issueevent_payload': 'string',
|
||||
'issueevent_context': 'string',
|
||||
'issueevent_contextstring': 'string',
|
||||
'issueevent_type': 'string'
|
||||
"sessionid": "Int64",
|
||||
"clickevent_hesitationtime": "Int64",
|
||||
"clickevent_label": "string",
|
||||
"clickevent_messageid": "Int64",
|
||||
"clickevent_timestamp": "Int64",
|
||||
"connectioninformation_downlink": "Int64",
|
||||
"connectioninformation_type": "string",
|
||||
"consolelog_level": "string",
|
||||
"consolelog_value": "string",
|
||||
"customevent_name": "string",
|
||||
"customevent_payload": "string",
|
||||
"fetch_duration": "Int64",
|
||||
"fetch_method": "string",
|
||||
"fetch_request": "string",
|
||||
"fetch_response": "string",
|
||||
"fetch_status": "Int64",
|
||||
"fetch_timestamp": "Int64",
|
||||
"fetch_url": "string",
|
||||
"graphql_operationkind": "string",
|
||||
"graphql_operationname": "string",
|
||||
"graphql_response": "string",
|
||||
"graphql_variables": "string",
|
||||
"inputevent_label": "string",
|
||||
"inputevent_messageid": "Int64",
|
||||
"inputevent_timestamp": "Int64",
|
||||
"inputevent_value": "string",
|
||||
"inputevent_valuemasked": "boolean",
|
||||
"jsexception_message": "string",
|
||||
"jsexception_name": "string",
|
||||
"jsexception_payload": "string",
|
||||
"jsexception_metadata": "string",
|
||||
"mouseclick_id": "Int64",
|
||||
"mouseclick_hesitationtime": "Int64",
|
||||
"mouseclick_label": "string",
|
||||
"networkrequest_type": "string",
|
||||
"networkrequest_method": "string",
|
||||
"networkrequest_url": "string",
|
||||
"networkrequest_request": "string",
|
||||
"networkrequest_response": "string",
|
||||
"networkrequest_status": "Int64",
|
||||
"networkrequest_timestamp": "Int64",
|
||||
"networkrequest_duration": "Int64",
|
||||
"pageevent_domcontentloadedeventend": "Int64",
|
||||
"pageevent_domcontentloadedeventstart": "Int64",
|
||||
"pageevent_firstcontentfulpaint": "Int64",
|
||||
"pageevent_firstpaint": "Int64",
|
||||
"pageevent_loaded": "boolean",
|
||||
"pageevent_loadeventend": "Int64",
|
||||
"pageevent_loadeventstart": "Int64",
|
||||
"pageevent_messageid": "Int64",
|
||||
"pageevent_referrer": "string",
|
||||
"pageevent_requeststart": "Int64",
|
||||
"pageevent_responseend": "Int64",
|
||||
"pageevent_responsestart": "Int64",
|
||||
"pageevent_speedindex": "Int64",
|
||||
"pageevent_timestamp": "Int64",
|
||||
"pageevent_url": "string",
|
||||
"sessionend_timestamp": "Int64",
|
||||
"sessionend_encryption_key": "string",
|
||||
"sessionstart_projectid": "Int64",
|
||||
"sessionstart_revid": "string",
|
||||
"sessionstart_timestamp": "Int64",
|
||||
"sessionstart_trackerversion": "string",
|
||||
"sessionstart_useragent": "string",
|
||||
"sessionstart_userbrowser": "string",
|
||||
"sessionstart_userbrowserversion": "string",
|
||||
"sessionstart_usercountry": "string",
|
||||
"sessionstart_userdevice": "string",
|
||||
"sessionstart_userdeviceheapsize": "Int64",
|
||||
"sessionstart_userdevicememorysize": "Int64",
|
||||
"sessionstart_userdevicetype": "string",
|
||||
"sessionstart_useros": "string",
|
||||
"sessionstart_userosversion": "string",
|
||||
"sessionstart_useruuid": "string",
|
||||
"setpagelocation_navigationstart": "Int64",
|
||||
"setpagelocation_referrer": "string",
|
||||
"setpagelocation_url": "string",
|
||||
"issueevent_message_id": "Int64",
|
||||
"issueevent_timestamp": "Int64",
|
||||
"issueevent_type": "string",
|
||||
"issueevent_context_string": "string",
|
||||
"issueevent_context": "string",
|
||||
"issueevent_payload": "string",
|
||||
"issueevent_url": "string",
|
||||
"customissue_name": "string",
|
||||
"customissue_payload": "string",
|
||||
"received_at": "Int64",
|
||||
"batch_order_number": "Int64",
|
||||
}
|
||||
dtypes_sessions = {'sessionid': 'Int64',
|
||||
'user_agent': 'string',
|
||||
'user_browser': 'string',
|
||||
'user_browser_version': 'string',
|
||||
'user_country': 'string',
|
||||
'user_device': 'string',
|
||||
'user_device_heap_size': 'Int64',
|
||||
'user_device_memory_size': 'Int64',
|
||||
'user_device_type': 'string',
|
||||
'user_os': 'string',
|
||||
'user_os_version': 'string',
|
||||
'user_uuid': 'string',
|
||||
'connection_effective_bandwidth': 'Int64',
|
||||
'connection_type': 'string',
|
||||
'metadata_key': 'string',
|
||||
'metadata_value': 'string',
|
||||
'referrer': 'string',
|
||||
'user_anonymous_id': 'string',
|
||||
'user_id': 'string',
|
||||
'session_start_timestamp': 'Int64',
|
||||
'session_end_timestamp': 'Int64',
|
||||
'session_duration': 'Int64',
|
||||
'first_contentful_paint': 'Int64',
|
||||
'speed_index': 'Int64',
|
||||
'visually_complete': 'Int64',
|
||||
'timing_time_to_interactive': 'Int64',
|
||||
'avg_cpu': 'Int64',
|
||||
'avg_fps': 'Int64',
|
||||
'max_cpu': 'Int64',
|
||||
'max_fps': 'Int64',
|
||||
'max_total_js_heap_size': 'Int64',
|
||||
'max_used_js_heap_size': 'Int64',
|
||||
'js_exceptions_count': 'Int64',
|
||||
'long_tasks_total_duration': 'Int64',
|
||||
'long_tasks_max_duration': 'Int64',
|
||||
'long_tasks_count': 'Int64',
|
||||
'inputs_count': 'Int64',
|
||||
'clicks_count': 'Int64',
|
||||
'issues_count': 'Int64',
|
||||
'issues': 'object',
|
||||
'urls_count': 'Int64',
|
||||
'urls': 'object'}
|
||||
dtypes_sessions = {'sessionid': "Int64",
|
||||
'user_agent': "string",
|
||||
'user_browser': "string",
|
||||
'user_browser_version': "string",
|
||||
'user_country': "string",
|
||||
'user_device': "string",
|
||||
'user_device_heap_size': "Int64",
|
||||
'user_device_memory_size': "Int64",
|
||||
'user_device_type': "string",
|
||||
'user_os': "string",
|
||||
'user_os_version': "string",
|
||||
'user_uuid': "string",
|
||||
'connection_effective_bandwidth': "Int64",
|
||||
'connection_type': "string",
|
||||
'metadata_key': "string",
|
||||
'metadata_value': "string",
|
||||
'referrer': "string",
|
||||
'user_anonymous_id': "string",
|
||||
'user_id': "string",
|
||||
'session_start_timestamp': "Int64",
|
||||
'session_end_timestamp': "Int64",
|
||||
'session_duration': "Int64",
|
||||
'first_contentful_paint': "Int64",
|
||||
'speed_index': "Int64",
|
||||
'visually_complete': "Int64",
|
||||
'timing_time_to_interactive': "Int64",
|
||||
'avg_cpu': "Int64",
|
||||
'avg_fps': "Int64",
|
||||
'max_cpu': "Int64",
|
||||
'max_fps': "Int64",
|
||||
'max_total_js_heap_size': "Int64",
|
||||
'max_used_js_heap_size': "Int64",
|
||||
'js_exceptions_count': "Int64",
|
||||
'inputs_count': "Int64",
|
||||
'clicks_count': "Int64",
|
||||
'issues_count': "Int64",
|
||||
'urls_count': "Int64",
|
||||
}
|
||||
|
||||
if DATABASE == 'bigquery':
|
||||
dtypes_sessions['urls'] = 'string'
|
||||
dtypes_sessions['issues'] = 'string'
|
||||
dtypes_sessions['urls'] = "string"
|
||||
dtypes_sessions['issues'] = "string"
|
||||
|
||||
detailed_events_col = []
|
||||
for col in DetailedEvent.__dict__:
|
||||
|
|
@ -360,13 +193,19 @@ def get_df_from_batch(batch, level):
|
|||
pass
|
||||
|
||||
if level == 'normal':
|
||||
df = df.astype(dtypes_events)
|
||||
current_types = dtypes_events
|
||||
if level == 'detailed':
|
||||
current_types = dtypes_detailed_events
|
||||
df['inputevent_value'] = None
|
||||
df['customevent_payload'] = None
|
||||
df = df.astype(dtypes_detailed_events)
|
||||
if level == 'sessions':
|
||||
df = df.astype(dtypes_sessions)
|
||||
current_types = dtypes_sessions
|
||||
df['js_exceptions_count'] = df['js_exceptions_count'].fillna(0)
|
||||
df['inputs_count'] = df['inputs_count'].fillna(0)
|
||||
df['clicks_count'] = df['clicks_count'].fillna(0)
|
||||
df['issues_count'] = df['issues_count'].fillna(0)
|
||||
df['urls_count'] = df['urls_count'].fillna(0)
|
||||
df = df.astype(current_types)
|
||||
|
||||
if DATABASE == 'clickhouse' and level == 'sessions':
|
||||
df['issues'] = df['issues'].fillna('')
|
||||
|
|
@ -374,7 +213,8 @@ def get_df_from_batch(batch, level):
|
|||
|
||||
for x in df.columns:
|
||||
try:
|
||||
if df[x].dtype == 'string':
|
||||
if df[x].dtype == "string" or current_types[x] == "string":
|
||||
df[x] = df[x].fillna('NULL')
|
||||
df[x] = df[x].str.slice(0, 255)
|
||||
df[x] = df[x].str.replace("|", "")
|
||||
except TypeError as e:
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
import os
|
||||
from decouple import config
|
||||
|
||||
DATABASE = os.environ['DATABASE_NAME']
|
||||
DATABASE = config('CLOUD_SERVICE')
|
||||
|
||||
from db.api import DBConnection
|
||||
from db.utils import get_df_from_batch
|
||||
from db.utils import get_df_from_batch, dtypes_sessions
|
||||
from db.tables import *
|
||||
|
||||
if DATABASE == 'redshift':
|
||||
from db.loaders.redshift_loader import transit_insert_to_redshift
|
||||
import pandas as pd
|
||||
elif DATABASE == 'clickhouse':
|
||||
from db.loaders.clickhouse_loader import insert_to_clickhouse
|
||||
elif DATABASE == 'pg':
|
||||
|
|
@ -21,23 +22,25 @@ else:
|
|||
raise Exception(f"{DATABASE}-database not supported")
|
||||
|
||||
# create tables if don't exist
|
||||
try:
|
||||
db = DBConnection(DATABASE)
|
||||
if DATABASE == 'pg':
|
||||
create_tables_postgres(db)
|
||||
if DATABASE == 'clickhouse':
|
||||
create_tables_clickhouse(db)
|
||||
if DATABASE == 'snowflake':
|
||||
create_tables_snowflake(db)
|
||||
if DATABASE == 'bigquery':
|
||||
create_tables_bigquery()
|
||||
if DATABASE == 'redshift':
|
||||
create_tables_redshift(db)
|
||||
db.engine.dispose()
|
||||
db = None
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
print("Please create the tables with scripts provided in " +
|
||||
_build_tables = config('build_tables', default=False, cast=bool)
|
||||
if _build_tables:
|
||||
try:
|
||||
db = DBConnection(DATABASE)
|
||||
if DATABASE == 'pg':
|
||||
create_tables_postgres(db)
|
||||
if DATABASE == 'clickhouse':
|
||||
create_tables_clickhouse(db)
|
||||
if DATABASE == 'snowflake':
|
||||
create_tables_snowflake(db)
|
||||
if DATABASE == 'bigquery':
|
||||
create_tables_bigquery()
|
||||
if DATABASE == 'redshift':
|
||||
create_tables_redshift(db)
|
||||
db.engine.dispose()
|
||||
db = None
|
||||
except Exception as e:
|
||||
print(repr(e))
|
||||
print("Please create the tables with scripts provided in " +
|
||||
f"'/sql/{DATABASE}_sessions.sql' and '/sql/{DATABASE}_events.sql'")
|
||||
|
||||
|
||||
|
|
@ -61,3 +64,29 @@ def insert_batch(db: DBConnection, batch, table, level='normal'):
|
|||
|
||||
if db.config == 'snowflake':
|
||||
insert_to_snowflake(db=db, df=df, table=table)
|
||||
|
||||
|
||||
def update_batch(db: DBConnection, batch, table):
|
||||
if len(batch) == 0:
|
||||
return
|
||||
df = get_df_from_batch(batch, level='sessions')
|
||||
base_query = f"UPDATE {table} SET"
|
||||
for column_name, column_type in dtypes_sessions.items():
|
||||
if column_name == 'sessionid':
|
||||
continue
|
||||
elif column_type == 'string':
|
||||
df[column_name] = df[column_name].fillna('NULL')
|
||||
base_query += f" {column_name} = " + "'{" + f"{column_name}" + "}',"
|
||||
else:
|
||||
df[column_name] = df[column_name].fillna(0)
|
||||
base_query += f" {column_name} = " + "{" + f"{column_name}" + "},"
|
||||
base_query = base_query[:-1] + " WHERE sessionid = {sessionid};"
|
||||
for i in range(len(df)):
|
||||
if db.config == 'redshift':
|
||||
params = dict(df.iloc[i])
|
||||
query = base_query.format(**params)
|
||||
try:
|
||||
db.pdredshift.exec_commit(query)
|
||||
except Exception as e:
|
||||
print('[ERROR] Error while executing query')
|
||||
print(repr(e))
|
||||
|
|
|
|||
|
|
@ -1,15 +1,10 @@
|
|||
FROM python:3.11
|
||||
FROM python:3.11-alpine
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
ENV LIBRD_VER=2.0.2
|
||||
RUN apk update && apk add postgresql-dev gcc python3-dev musl-dev linux-headers g++ libc-dev libffi-dev make cmake py-pip build-base
|
||||
RUN apk add --no-cache --virtual .make-deps bash make wget git gcc g++ && apk add --no-cache musl-dev zlib-dev openssl zstd-dev pkgconfig libc-dev && wget https://github.com/edenhill/librdkafka/archive/v${LIBRD_VER}.tar.gz && tar -xvf v${LIBRD_VER}.tar.gz && cd librdkafka-${LIBRD_VER} && ./configure --prefix /usr && make && make install && make clean && rm -rf librdkafka-${LIBRD_VER} && rm -rf v${LIBRD_VER}.tar.gz && apk del .make-deps
|
||||
COPY . .
|
||||
RUN apt update
|
||||
RUN apt-get install -y libc-dev libffi-dev gcc
|
||||
RUN apt update && apt -y install software-properties-common gcc
|
||||
RUN git clone https://github.com/edenhill/librdkafka
|
||||
RUN cd librdkafka && ./configure && make && make install && ldconfig
|
||||
|
||||
RUN pip install -r ./deploy/requirements_bigquery.txt
|
||||
|
||||
CMD ["python", "consumer.py"]
|
||||
RUN pip install -r deploy/requirements_bigquery.txt
|
||||
ENTRYPOINT ./entrypoint.sh
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,10 @@
|
|||
FROM python:3.11
|
||||
FROM python:3.11-alpine
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
ENV LIBRD_VER=2.0.2
|
||||
RUN apk update && apk add postgresql-dev gcc python3-dev musl-dev linux-headers g++ libc-dev libffi-dev make cmake py-pip build-base
|
||||
RUN apk add --no-cache --virtual .make-deps bash make wget git gcc g++ && apk add --no-cache musl-dev zlib-dev openssl zstd-dev pkgconfig libc-dev && wget https://github.com/edenhill/librdkafka/archive/v${LIBRD_VER}.tar.gz && tar -xvf v${LIBRD_VER}.tar.gz && cd librdkafka-${LIBRD_VER} && ./configure --prefix /usr && make && make install && make clean && rm -rf librdkafka-${LIBRD_VER} && rm -rf v${LIBRD_VER}.tar.gz && apk del .make-deps
|
||||
COPY . .
|
||||
RUN apt update
|
||||
RUN apt-get install -y libc-dev libffi-dev gcc
|
||||
RUN apt update && apt -y install software-properties-common gcc
|
||||
RUN git clone https://github.com/edenhill/librdkafka
|
||||
RUN cd librdkafka && ./configure && make && make install && ldconfig
|
||||
|
||||
RUN pip install -r ./deploy/requirements_clickhouse.txt
|
||||
|
||||
CMD ["python", "consumer.py"]
|
||||
RUN pip install -r deploy/requirements_clickhouse.txt
|
||||
ENTRYPOINT ./entrypoint.sh
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,10 @@
|
|||
FROM python:3.11
|
||||
FROM python:3.11-alpine
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
ENV LIBRD_VER=2.0.2
|
||||
RUN apk update && apk add postgresql-dev gcc python3-dev musl-dev linux-headers g++ libc-dev libffi-dev make cmake py-pip build-base
|
||||
RUN apk add --no-cache --virtual .make-deps bash make wget git gcc g++ && apk add --no-cache musl-dev zlib-dev openssl zstd-dev pkgconfig libc-dev && wget https://github.com/edenhill/librdkafka/archive/v${LIBRD_VER}.tar.gz && tar -xvf v${LIBRD_VER}.tar.gz && cd librdkafka-${LIBRD_VER} && ./configure --prefix /usr && make && make install && make clean && rm -rf librdkafka-${LIBRD_VER} && rm -rf v${LIBRD_VER}.tar.gz && apk del .make-deps
|
||||
COPY . .
|
||||
RUN apt update
|
||||
RUN apt-get install -y libc-dev libffi-dev gcc
|
||||
RUN apt update && apt -y install software-properties-common gcc
|
||||
RUN git clone https://github.com/edenhill/librdkafka
|
||||
RUN cd librdkafka && ./configure && make && make install && ldconfig
|
||||
|
||||
RUN pip install -r ./deploy/requirements_pg.txt
|
||||
|
||||
CMD ["python", "consumer.py"]
|
||||
RUN pip install -r deploy/requirements_pg.txt
|
||||
ENTRYPOINT ./entrypoint.sh
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
FROM python:3.11
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
FROM public.ecr.aws/p1t3u8a3/connectors/redshift:base
|
||||
|
||||
ENV CLOUD_SERVICE=redshift \
|
||||
CONNECTION_STRING=postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{DBNAME} \
|
||||
# Keep postgres connection
|
||||
PG_MINCONN=3 \
|
||||
PG_MAXCONN=10
|
||||
RUN apk add --no-cache postgresql-libs lz4-libs zstd-libs
|
||||
COPY deploy/requirements_redshift.txt .
|
||||
RUN apk add --no-cache --virtual .build-deps gcc g++ musl-dev postgresql-dev && \
|
||||
python3 -m pip install -r requirements_redshift.txt --no-cache-dir && \
|
||||
apk --purge del .build-deps
|
||||
COPY . .
|
||||
RUN apt update
|
||||
RUN apt-get install -y libc-dev libffi-dev gcc
|
||||
RUN apt update && apt -y install software-properties-common gcc
|
||||
RUN git clone https://github.com/edenhill/librdkafka
|
||||
RUN cd librdkafka && ./configure && make && make install && ldconfig
|
||||
|
||||
RUN pip install -r ./deploy/requirements_redshift.txt
|
||||
|
||||
CMD ["python", "consumer.py"]
|
||||
|
||||
ENTRYPOINT ./entrypoint.sh
|
||||
|
|
|
|||
13
ee/connectors/deploy/Dockerfile_redshift_base
Normal file
13
ee/connectors/deploy/Dockerfile_redshift_base
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
FROM amancevice/pandas:2.0.0-alpine
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
ENV LIBRD_VER=2.0.2
|
||||
WORKDIR /work
|
||||
RUN apk add --no-cache --virtual .make-deps postgresql-dev gcc python3-dev \
|
||||
musl-dev linux-headers g++ libc-dev libffi-dev make cmake py-pip build-base \
|
||||
bash make wget git gcc g++ musl-dev zlib-dev openssl zstd-dev pkgconfig libc-dev && \
|
||||
wget https://github.com/edenhill/librdkafka/archive/v${LIBRD_VER}.tar.gz && \
|
||||
tar -xvf v${LIBRD_VER}.tar.gz && cd librdkafka-${LIBRD_VER} && \
|
||||
./configure --prefix /usr && make && make install && make clean && \
|
||||
cd /work && rm -rf librdkafka-${LIBRD_VER} && rm -rf v${LIBRD_VER}.tar.gz \
|
||||
&& apk del .make-deps
|
||||
|
|
@ -1,15 +1,10 @@
|
|||
FROM python:3.11
|
||||
FROM python:3.11-alpine
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
ENV LIBRD_VER=2.0.2
|
||||
RUN apk update && apk add postgresql-dev gcc python3-dev musl-dev linux-headers g++ libc-dev libffi-dev make cmake py-pip build-base
|
||||
RUN apk add --no-cache --virtual .make-deps bash make wget git gcc g++ && apk add --no-cache musl-dev zlib-dev openssl zstd-dev pkgconfig libc-dev && wget https://github.com/edenhill/librdkafka/archive/v${LIBRD_VER}.tar.gz && tar -xvf v${LIBRD_VER}.tar.gz && cd librdkafka-${LIBRD_VER} && ./configure --prefix /usr && make && make install && make clean && rm -rf librdkafka-${LIBRD_VER} && rm -rf v${LIBRD_VER}.tar.gz && apk del .make-deps
|
||||
COPY . .
|
||||
RUN apt update
|
||||
RUN apt-get install -y libc-dev libffi-dev gcc
|
||||
RUN apt update && apt -y install software-properties-common gcc
|
||||
RUN git clone https://github.com/edenhill/librdkafka
|
||||
RUN cd librdkafka && ./configure && make && make install && ldconfig
|
||||
|
||||
RUN pip install -r ./deploy/requirements_snowflake.txt
|
||||
|
||||
CMD ["python", "consumer.py"]
|
||||
RUN pip install -r deploy/requirements_snowflake.txt
|
||||
ENTRYPOINT ./entrypoint.sh
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
certifi==2022.09.24
|
||||
chardet==5.0.0
|
||||
clickhouse-driver==0.2.4
|
||||
clickhouse-sqlalchemy==0.2.2
|
||||
chardet==5.1.0
|
||||
idna==3.4
|
||||
confluent-kafka
|
||||
psycopg2-binary==2.9.3
|
||||
confluent-kafka==2.0.2
|
||||
psycopg2-binary==2.9.6
|
||||
python-decouple==3.8
|
||||
pytz==2022.6
|
||||
requests==2.28.1
|
||||
SQLAlchemy==1.4.43
|
||||
SQLAlchemy==2.0.8
|
||||
tzlocal
|
||||
urllib3==1.26.12
|
||||
urllib3==1.26.15
|
||||
sqlalchemy-redshift
|
||||
redshift-connector
|
||||
pandas-redshift
|
||||
PyYAML
|
||||
|
|
|
|||
2
ee/connectors/entrypoint.sh
Executable file
2
ee/connectors/entrypoint.sh
Executable file
|
|
@ -0,0 +1,2 @@
|
|||
echo "[INFO] Starting service"
|
||||
python -u consumer_async.py
|
||||
|
|
@ -19,27 +19,10 @@ def handle_normal_message(message: Message) -> Optional[Event]:
|
|||
return n
|
||||
|
||||
if isinstance(message, CustomEvent):
|
||||
n.customevent_messageid = message.message_id
|
||||
n.customevent_name = message.name
|
||||
n.customevent_timestamp = message.timestamp
|
||||
n.customevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, ErrorEvent):
|
||||
n.errorevent_message = message.message
|
||||
n.errorevent_messageid = message.message_id
|
||||
n.errorevent_name = message.name
|
||||
n.errorevent_payload = message.payload
|
||||
n.errorevent_source = message.source
|
||||
n.errorevent_timestamp = message.timestamp
|
||||
return n
|
||||
|
||||
if isinstance(message, JSException):
|
||||
n.jsexception_name = message.name
|
||||
n.jsexception_payload = message.payload
|
||||
n.jsexception_message = message.message
|
||||
return n
|
||||
|
||||
if isinstance(message, Metadata):
|
||||
n.metadata_key = message.key
|
||||
n.metadata_value = message.value
|
||||
|
|
@ -52,11 +35,15 @@ def handle_normal_message(message: Message) -> Optional[Event]:
|
|||
n.mouseclick_selector = message.selector
|
||||
return n
|
||||
|
||||
if isinstance(message, MouseClickDepricated):
|
||||
n.mouseclick_hesitationtime = message.hesitation_time
|
||||
n.mouseclick_id = message.id
|
||||
n.mouseclick_label = message.label
|
||||
n.mouseclick_selector = ''
|
||||
if isinstance(message, NetworkRequest):
|
||||
n.networkrequest_type = message.type
|
||||
n.networkrequest_method = message.method
|
||||
n.networkrequest_url = message.url
|
||||
n.networkrequest_request = message.request
|
||||
n.networkrequest_response = message.response
|
||||
n.networkrequest_status = message.status
|
||||
n.networkrequest_timestamp = message.timestamp
|
||||
n.networkrequest_duration = message.duration
|
||||
return n
|
||||
|
||||
if isinstance(message, PageEvent):
|
||||
|
|
@ -74,11 +61,6 @@ def handle_normal_message(message: Message) -> Optional[Event]:
|
|||
n.pagerendertiming_visuallycomplete = message.visually_complete
|
||||
return n
|
||||
|
||||
if isinstance(message, RawCustomEvent):
|
||||
n.rawcustomevent_name = message.name
|
||||
n.rawcustomevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, SetViewportSize):
|
||||
n.setviewportsize_height = message.height
|
||||
n.setviewportsize_width = message.width
|
||||
|
|
@ -100,9 +82,10 @@ def handle_normal_message(message: Message) -> Optional[Event]:
|
|||
n.issueevent_messageid = message.message_id
|
||||
n.issueevent_timestamp = message.timestamp
|
||||
n.issueevent_type = message.type
|
||||
n.issueevent_contextstring = message.context_string
|
||||
n.issueevent_context_string = message.context_string
|
||||
n.issueevent_context = message.context
|
||||
n.issueevent_payload = message.payload
|
||||
n.issueevent_url = message.url
|
||||
return n
|
||||
|
||||
if isinstance(message, CustomIssue):
|
||||
|
|
@ -147,14 +130,18 @@ def handle_session(n: Session, message: Message) -> Optional[Session]:
|
|||
return n
|
||||
|
||||
if isinstance(message, BatchMetadata):
|
||||
n.batchmeta_page_no = message.page_no
|
||||
n.batchmeta_first_index = message.first_index
|
||||
n.batchmeta_timestamp = message.timestamp
|
||||
n.batchmetadata_version = message.version
|
||||
n.batchmetadata_page_no = message.page_no
|
||||
n.batchmetadata_first_index = message.first_index
|
||||
n.batchmetadata_timestamp = message.timestamp
|
||||
n.batchmetadata_location = message.location
|
||||
return n
|
||||
|
||||
|
||||
if isinstance(message, PartitionedMessage):
|
||||
n.part_no = message.part_no
|
||||
n.part_total = message.part_total
|
||||
n.partitionedmessage_part_no = message.part_no
|
||||
n.partitionedmessage_part_total = message.part_total
|
||||
return n
|
||||
|
||||
# if isinstance(message, IOSBatchMeta):
|
||||
# n.iosbatchmeta_page_no = message.page_no
|
||||
|
|
@ -182,10 +169,10 @@ def handle_session(n: Session, message: Message) -> Optional[Session]:
|
|||
n.urls_count += 1
|
||||
except TypeError:
|
||||
n.urls_count = 1
|
||||
try:
|
||||
n.urls.append(message.url)
|
||||
except AttributeError:
|
||||
n.urls = [message.url]
|
||||
#itry:
|
||||
# n.urls.append(message.url)
|
||||
#except AttributeError:
|
||||
# n.urls = [message.url]
|
||||
return n
|
||||
|
||||
if isinstance(message, PerformanceTrackAggr):
|
||||
|
|
@ -205,30 +192,30 @@ def handle_session(n: Session, message: Message) -> Optional[Session]:
|
|||
n.user_anonymous_id = message.id
|
||||
return n
|
||||
|
||||
if isinstance(message, JSException):
|
||||
if isinstance(message, JSException) or isinstance(message, JSExceptionDeprecated):
|
||||
try:
|
||||
n.js_exceptions_count += 1
|
||||
except TypeError:
|
||||
n.js_exceptions_count = 1
|
||||
return n
|
||||
|
||||
if isinstance(message, LongTask):
|
||||
try:
|
||||
n.long_tasks_total_duration += message.duration
|
||||
except TypeError:
|
||||
n.long_tasks_total_duration = message.duration
|
||||
#if isinstance(message, LongTask):
|
||||
# try:
|
||||
# n.long_tasks_total_duration += message.duration
|
||||
# except TypeError:
|
||||
# n.long_tasks_total_duration = message.duration
|
||||
|
||||
try:
|
||||
if n.long_tasks_max_duration > message.duration:
|
||||
n.long_tasks_max_duration = message.duration
|
||||
except TypeError:
|
||||
n.long_tasks_max_duration = message.duration
|
||||
# try:
|
||||
# if n.long_tasks_max_duration > message.duration:
|
||||
# n.long_tasks_max_duration = message.duration
|
||||
# except TypeError:
|
||||
# n.long_tasks_max_duration = message.duration
|
||||
|
||||
try:
|
||||
n.long_tasks_count += 1
|
||||
except TypeError:
|
||||
n.long_tasks_count = 1
|
||||
return n
|
||||
# try:
|
||||
# n.long_tasks_count += 1
|
||||
# except TypeError:
|
||||
# n.long_tasks_count = 1
|
||||
# return n
|
||||
|
||||
if isinstance(message, InputEvent):
|
||||
try:
|
||||
|
|
@ -239,58 +226,36 @@ def handle_session(n: Session, message: Message) -> Optional[Session]:
|
|||
|
||||
if isinstance(message, MouseClick):
|
||||
try:
|
||||
n.inputs_count += 1
|
||||
n.clicks_count += 1
|
||||
except TypeError:
|
||||
n.inputs_count = 1
|
||||
n.clicks_count = 1
|
||||
return n
|
||||
|
||||
if isinstance(message, IssueEvent):
|
||||
if isinstance(message, IssueEvent) or isinstance(message, IssueEventDeprecated):
|
||||
try:
|
||||
n.issues_count += 1
|
||||
except TypeError:
|
||||
n.issues_count = 1
|
||||
|
||||
|
||||
n.inputs_count = 1
|
||||
return n
|
||||
|
||||
if isinstance(message, MouseClickDepricated):
|
||||
try:
|
||||
n.inputs_count += 1
|
||||
except TypeError:
|
||||
n.inputs_count = 1
|
||||
return n
|
||||
|
||||
if isinstance(message, IssueEvent):
|
||||
try:
|
||||
n.issues_count += 1
|
||||
except TypeError:
|
||||
n.issues_count = 1
|
||||
|
||||
try:
|
||||
n.issues.append(message.type)
|
||||
except AttributeError:
|
||||
n.issues = [message.type]
|
||||
#try:
|
||||
# n.issues.append(message.type)
|
||||
#except AttributeError:
|
||||
# n.issues = [message.type]
|
||||
return n
|
||||
|
||||
|
||||
def handle_message(message: Message) -> Optional[DetailedEvent]:
|
||||
n = DetailedEvent()
|
||||
|
||||
if isinstance(message, SessionEnd):
|
||||
n.sessionend = True
|
||||
n.sessionend_timestamp = message.timestamp
|
||||
return n
|
||||
# if isinstance(message, SessionEnd):
|
||||
# n.sessionend = True
|
||||
# n.sessionend_timestamp = message.timestamp
|
||||
# return n
|
||||
|
||||
if isinstance(message, Timestamp):
|
||||
n.timestamp_timestamp = message.timestamp
|
||||
return n
|
||||
|
||||
if isinstance(message, SessionDisconnect):
|
||||
n.sessiondisconnect = True
|
||||
n.sessiondisconnect_timestamp = message.timestamp
|
||||
return n
|
||||
|
||||
if isinstance(message, SessionStart):
|
||||
n.sessionstart_trackerversion = message.tracker_version
|
||||
n.sessionstart_revid = message.rev_id
|
||||
|
|
@ -352,35 +317,27 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.pagerendertiming_timetointeractive = message.time_to_interactive
|
||||
return n
|
||||
|
||||
if isinstance(message, ResourceTiming):
|
||||
n.resourcetiming_timestamp = message.timestamp
|
||||
n.resourcetiming_duration = message.duration
|
||||
n.resourcetiming_ttfb = message.ttfb
|
||||
n.resourcetiming_headersize = message.header_size
|
||||
n.resourcetiming_encodedbodysize = message.encoded_body_size
|
||||
n.resourcetiming_decodedbodysize = message.decoded_body_size
|
||||
n.resourcetiming_url = message.url
|
||||
n.resourcetiming_initiator = message.initiator
|
||||
# if isinstance(message, ResourceTiming):
|
||||
# n.resourcetiming_timestamp = message.timestamp
|
||||
# n.resourcetiming_duration = message.duration
|
||||
# n.resourcetiming_ttfb = message.ttfb
|
||||
# n.resourcetiming_headersize = message.header_size
|
||||
# n.resourcetiming_encodedbodysize = message.encoded_body_size
|
||||
# n.resourcetiming_decodedbodysize = message.decoded_body_size
|
||||
# n.resourcetiming_url = message.url
|
||||
# n.resourcetiming_initiator = message.initiator
|
||||
# return n
|
||||
|
||||
|
||||
|
||||
if isinstance(message, IntegrationEvent):
|
||||
n.integrationevent_timestamp = message.timestamp
|
||||
n.integrationevent_source = message.source
|
||||
n.integrationevent_name = message.name
|
||||
n.integrationevent_message = message.message
|
||||
n.integrationevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, JSException):
|
||||
n.jsexception_name = message.name
|
||||
n.jsexception_message = message.message
|
||||
n.jsexception_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, RawErrorEvent):
|
||||
n.rawerrorevent_timestamp = message.timestamp
|
||||
n.rawerrorevent_source = message.source
|
||||
n.rawerrorevent_name = message.name
|
||||
n.rawerrorevent_message = message.message
|
||||
n.rawerrorevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, RawCustomEvent):
|
||||
n.rawcustomevent_name = message.name
|
||||
n.rawcustomevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, UserID):
|
||||
n.userid_id = message.id
|
||||
|
|
@ -402,14 +359,78 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
return n
|
||||
|
||||
if isinstance(message, BatchMetadata):
|
||||
n.batchmeta_page_no = message.page_no
|
||||
n.batchmeta_first_index = message.first_index
|
||||
n.batchmeta_timestamp = message.timestamp
|
||||
n.batchmetadata_version = message.version
|
||||
n.batchmetadata_page_no = message.page_no
|
||||
n.batchmetadata_first_index = message.first_index
|
||||
n.batchmetadata_timestamp = message.timestamp
|
||||
n.batchmetadata_location = message.location
|
||||
return n
|
||||
|
||||
if isinstance(message, PartitionedMessage):
|
||||
n.part_no = message.part_no
|
||||
n.part_total = message.part_total
|
||||
n.partitionedmessage_part_no = message.part_no
|
||||
n.partitionedmessage_part_total = message.part_total
|
||||
return n
|
||||
|
||||
if isinstance(message, InputChange):
|
||||
n.inputchange_id=message.id
|
||||
n.inputchange_value=message.value
|
||||
n.inputchange_value_masked=message.value_masked
|
||||
n.inputchange_label=message.label
|
||||
n.inputchange_hesitation_time=message.hesitation_time
|
||||
n.inputchange_input_duration=message.input_duration
|
||||
return n
|
||||
|
||||
|
||||
if isinstance(message, SelectionChange):
|
||||
n.selectionchange_selection_start=message.selection_start
|
||||
n.selectionchange_selection_end=message.selection_end
|
||||
n.selectionchange_selection=message.selection
|
||||
return n
|
||||
|
||||
if isinstance(message, MouseThrashing):
|
||||
n.mousethrashing_timestamp=message.timestamp
|
||||
return n
|
||||
|
||||
if isinstance(message, UnbindNodes):
|
||||
n.unbindnodes_total_removed_percent=message.total_removed_percent
|
||||
return n
|
||||
|
||||
|
||||
if isinstance(message, ResourceTiming):
|
||||
n.resourcetiming_timestamp=message.timestamp
|
||||
n.resourcetiming_duration=message.duration
|
||||
n.resourcetiming_ttfb=message.ttfb
|
||||
n.resourcetiming_header_size=message.header_size
|
||||
n.resourcetiming_encoded_body_size=message.encoded_body_size
|
||||
n.resourcetiming_decoded_body_size=message.decoded_body_size
|
||||
n.resourcetiming_url=message.url
|
||||
n.resourcetiming_initiator=message.initiator
|
||||
n.resourcetiming_transferred_size=message.transferred_size
|
||||
n.resourcetiming_cached=message.cached
|
||||
return n
|
||||
|
||||
|
||||
if isinstance(message, IssueEvent):
|
||||
n.issueevent_message_id=message.message_id
|
||||
n.issueevent_timestamp=message.timestamp
|
||||
n.issueevent_type=message.type
|
||||
n.issueevent_context_string=message.context_string
|
||||
n.issueevent_context=message.context
|
||||
n.issueevent_payload=message.payload
|
||||
n.issueevent_url=message.url
|
||||
return n
|
||||
|
||||
if isinstance(message, SessionEnd):
|
||||
n.sessionend_timestamp=message.timestamp
|
||||
n.sessionend_encryption_key=message.encryption_key
|
||||
return n
|
||||
|
||||
|
||||
if isinstance(message, SessionSearch):
|
||||
n.sessionsearch_timestamp=message.timestamp
|
||||
n.sessionsearch_partition=message.partition
|
||||
return n
|
||||
|
||||
|
||||
if isinstance(message, PerformanceTrack):
|
||||
n.performancetrack_frames = message.frames
|
||||
|
|
@ -466,44 +487,73 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.inputevent_label = message.label
|
||||
return n
|
||||
|
||||
if isinstance(message, ClickEvent):
|
||||
n.clickevent_messageid = message.message_id
|
||||
n.clickevent_timestamp = message.timestamp
|
||||
n.clickevent_hesitationtime = message.hesitation_time
|
||||
n.clickevent_label = message.label
|
||||
return n
|
||||
|
||||
if isinstance(message, ErrorEvent):
|
||||
n.errorevent_messageid = message.message_id
|
||||
n.errorevent_timestamp = message.timestamp
|
||||
n.errorevent_source = message.source
|
||||
n.errorevent_name = message.name
|
||||
n.errorevent_message = message.message
|
||||
n.errorevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, ResourceEvent):
|
||||
n.resourceevent_messageid = message.message_id
|
||||
n.resourceevent_timestamp = message.timestamp
|
||||
n.resourceevent_duration = message.duration
|
||||
n.resourceevent_ttfb = message.ttfb
|
||||
n.resourceevent_headersize = message.header_size
|
||||
n.resourceevent_encodedbodysize = message.encoded_body_size
|
||||
n.resourceevent_decodedbodysize = message.decoded_body_size
|
||||
n.resourceevent_url = message.url
|
||||
n.resourceevent_type = message.type
|
||||
n.resourceevent_success = message.success
|
||||
n.resourceevent_method = message.method
|
||||
n.resourceevent_status = message.status
|
||||
return n
|
||||
|
||||
if isinstance(message, CustomEvent):
|
||||
n.customevent_messageid = message.message_id
|
||||
n.customevent_timestamp = message.timestamp
|
||||
n.customevent_name = message.name
|
||||
n.customevent_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, LoadFontFace):
|
||||
n.loadfontface_parent_id = message.parent_id
|
||||
n.loadfontface_family = message.family
|
||||
n.loadfontface_source = message.source
|
||||
n.loadfontface_descriptors = message.descriptors
|
||||
return n
|
||||
|
||||
if isinstance(message, SetNodeFocus):
|
||||
n.setnodefocus_id = message.id
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSReplaceURLBased):
|
||||
n.adoptedssreplaceurlbased_sheet_id = message.sheet_id
|
||||
n.adoptedssreplaceurlbased_text = message.text
|
||||
n.adoptedssreplaceurlbased_base_url = message.base_url
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSReplace):
|
||||
n.adoptedssreplace_sheet_id = message.sheet_id
|
||||
n.adoptedssreplace_text = message.text
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSInsertRuleURLBased):
|
||||
n.adoptedssinsertruleurlbased_sheet_id = message.sheet_id
|
||||
n.adoptedssinsertruleurlbased_rule = message.rule
|
||||
n.adoptedssinsertruleurlbased_index = message.index
|
||||
n.adoptedssinsertruleurlbased_base_url = message.base_url
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSInsertRule):
|
||||
n.adoptedssinsertrule_sheet_id = message.sheet_id
|
||||
n.adoptedssinsertrule_rule = message.rule
|
||||
n.adoptedssinsertrule_index = message.index
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSDeleteRule):
|
||||
n.adoptedssdeleterule_sheet_id = message.sheet_id
|
||||
n.adoptedssdeleterule_index = message.index
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSAddOwner):
|
||||
n.adoptedssaddowner_sheet_id = message.sheet_id
|
||||
n.adoptedssaddowner_id = message.id
|
||||
return n
|
||||
|
||||
if isinstance(message, AdoptedSSRemoveOwner):
|
||||
n.adoptedssremoveowner_sheet_id = message.sheet_id
|
||||
n.adoptedssremoveowner_id = message.id
|
||||
return n
|
||||
|
||||
if isinstance(message, JSException):
|
||||
n.jsexception_name = message.name
|
||||
n.jsexception_message = message.message
|
||||
n.jsexception_payload = message.payload
|
||||
n.jsexception_metadata = message.metadata
|
||||
return n
|
||||
|
||||
if isinstance(message, Zustand):
|
||||
n.zustand_mutation = message.mutation
|
||||
n.zustand_state = message.state
|
||||
return n
|
||||
|
||||
# if isinstance(message, CreateDocument):
|
||||
# n.createdocument = True
|
||||
# return n
|
||||
|
|
@ -542,15 +592,10 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.fetch_duration = message.duration
|
||||
return n
|
||||
|
||||
if isinstance(message, FetchEvent):
|
||||
n.fetch_event_message_id = message.message_id
|
||||
n.fetch_event_timestamp = message.timestamp
|
||||
n.fetch_event_method = message.method
|
||||
n.fetch_event_url = message.url
|
||||
n.fetch_event_request = message.request
|
||||
n.fetch_event_response = message.response
|
||||
n.fetch_event_status = message.status
|
||||
n.fetch_event_duration = message.duration
|
||||
if isinstance(message, SetNodeAttributeDict):
|
||||
n.setnodeattributedict_id = message.id,
|
||||
n.setnodeattributedict_name_key = message.name_key
|
||||
n.setnodeattributedict_value_key = message.value_key
|
||||
return n
|
||||
|
||||
if isinstance(message, Profiler):
|
||||
|
|
@ -567,16 +612,6 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.graphql_response = message.response
|
||||
return n
|
||||
|
||||
if isinstance(message, GraphQLEvent):
|
||||
n.graphqlevent_messageid = message.message_id
|
||||
n.graphqlevent_timestamp = message.timestamp
|
||||
n.graphqlevent_name = message.name
|
||||
return n
|
||||
|
||||
if isinstance(message, DomDrop):
|
||||
n.domdrop_timestamp = message.timestamp
|
||||
return n
|
||||
|
||||
if isinstance(message, MouseClick):
|
||||
n.mouseclick_id = message.id
|
||||
n.mouseclick_hesitationtime = message.hesitation_time
|
||||
|
|
@ -584,13 +619,6 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.mouseclick_selector = message.selector
|
||||
return n
|
||||
|
||||
if isinstance(message, MouseClickDepricated):
|
||||
n.mouseclick_id = message.id
|
||||
n.mouseclick_hesitationtime = message.hesitation_time
|
||||
n.mouseclick_label = message.label
|
||||
n.mouseclick_selector = ''
|
||||
return n
|
||||
|
||||
if isinstance(message, SetPageLocation):
|
||||
n.setpagelocation_url = message.url
|
||||
n.setpagelocation_referrer = message.referrer
|
||||
|
|
@ -612,27 +640,15 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.longtasks_containername = message.container_name
|
||||
return n
|
||||
|
||||
if isinstance(message, SetNodeURLBasedAttribute):
|
||||
n.setnodeurlbasedattribute_id = message.id
|
||||
n.setnodeurlbasedattribute_name = message.name
|
||||
n.setnodeurlbasedattribute_value = message.value
|
||||
n.setnodeurlbasedattribute_baseurl = message.base_url
|
||||
return n
|
||||
|
||||
if isinstance(message, SetStyleData):
|
||||
n.setstyledata_id = message.id
|
||||
n.setstyledata_data = message.data
|
||||
n.setstyledata_baseurl = message.base_url
|
||||
return n
|
||||
|
||||
if isinstance(message, IssueEvent):
|
||||
n.issueevent_messageid = message.message_id
|
||||
n.issueevent_timestamp = message.timestamp
|
||||
n.issueevent_type = message.type
|
||||
n.issueevent_contextstring = message.context_string
|
||||
n.issueevent_context = message.context
|
||||
n.issueevent_payload = message.payload
|
||||
return n
|
||||
#if isinstance(message, IssueEvent):
|
||||
# n.issueevent_message_id = message.message_id
|
||||
# n.issueevent_timestamp = message.timestamp
|
||||
# n.issueevent_type = message.type
|
||||
# n.issueevent_context_string = message.context_string
|
||||
# n.issueevent_context = message.context
|
||||
# n.issueevent_payload = message.payload
|
||||
# n.issueevent_url = message.url
|
||||
# return n
|
||||
|
||||
if isinstance(message, TechnicalInfo):
|
||||
n.technicalinfo_type = message.type
|
||||
|
|
@ -644,10 +660,6 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
n.customissue_payload = message.payload
|
||||
return n
|
||||
|
||||
if isinstance(message, PageClose):
|
||||
n.pageclose = True
|
||||
return n
|
||||
|
||||
if isinstance(message, AssetCache):
|
||||
n.asset_cache_url = message.url
|
||||
return n
|
||||
|
|
@ -677,7 +689,7 @@ def handle_message(message: Message) -> Optional[DetailedEvent]:
|
|||
return n
|
||||
|
||||
if isinstance(message, IOSBatchMeta):
|
||||
n.iosbatchmeta_page_no = message.page_no
|
||||
n.iosbatchmeta_lenght = message.length
|
||||
n.iosbatchmeta_first_index = message.first_index
|
||||
n.iosbatchmeta_timestamp = message.timestamp
|
||||
return n
|
||||
|
|
|
|||
|
|
@ -34,6 +34,16 @@ class Codec:
|
|||
s += 7
|
||||
i += 1
|
||||
|
||||
@staticmethod
|
||||
def read_size(reader: io.BytesIO):
|
||||
size = 0
|
||||
for i in range(3):
|
||||
b = reader.read(1)
|
||||
num = int.from_bytes(b, "big", signed=False)
|
||||
size += num << (8*i)
|
||||
return size
|
||||
|
||||
|
||||
@staticmethod
|
||||
def read_int(reader: io.BytesIO) -> int:
|
||||
"""
|
||||
|
|
@ -57,7 +67,11 @@ class Codec:
|
|||
@staticmethod
|
||||
def read_string(reader: io.BytesIO) -> str:
|
||||
length = Codec.read_uint(reader)
|
||||
s = reader.read(length)
|
||||
try:
|
||||
s = reader.read(length)
|
||||
except Exception as e:
|
||||
print(f'Error while reading string of length {length}')
|
||||
raise Exception(e)
|
||||
try:
|
||||
return s.decode("utf-8", errors="replace").replace("\x00", "\uFFFD")
|
||||
except UnicodeDecodeError:
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ class CreateDocument(Message):
|
|||
__id__ = 7
|
||||
|
||||
def __init__(self, ):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class CreateElementNode(Message):
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@ import io
|
|||
|
||||
class MessageCodec(Codec):
|
||||
|
||||
def __init__(self, msg_selector: List[int] = list()):
|
||||
self.msg_selector = msg_selector
|
||||
|
||||
def read_message_id(self, reader: io.BytesIO) -> int:
|
||||
"""
|
||||
Read and return the first byte where the message id is encoded
|
||||
|
|
@ -46,27 +49,41 @@ class MessageCodec(Codec):
|
|||
def decode_detailed(self, b: bytes) -> List[Message]:
|
||||
reader = io.BytesIO(b)
|
||||
messages_list = list()
|
||||
messages_list.append(self.handler(reader, 0))
|
||||
try:
|
||||
messages_list.append(self.handler(reader, 0))
|
||||
except IndexError:
|
||||
print('[WARN] Broken batch')
|
||||
return list()
|
||||
if isinstance(messages_list[0], BatchMeta):
|
||||
# Old BatchMeta
|
||||
mode = 0
|
||||
elif isinstance(messages_list[0], BatchMetadata):
|
||||
# New BatchMeta
|
||||
mode = 1
|
||||
if messages_list[0].version == 0:
|
||||
mode = 0
|
||||
else:
|
||||
mode = 1
|
||||
else:
|
||||
return messages_list
|
||||
while True:
|
||||
try:
|
||||
messages_list.append(self.handler(reader, mode))
|
||||
msg_decoded = self.handler(reader, mode)
|
||||
if msg_decoded is not None:
|
||||
messages_list.append(msg_decoded)
|
||||
except IndexError:
|
||||
break
|
||||
return messages_list
|
||||
|
||||
def handler(self, reader: io.BytesIO, mode=0) -> Message:
|
||||
message_id = self.read_message_id(reader)
|
||||
#print(f'[INFO-context] Current mode {mode}')
|
||||
#print(f'[INFO] Currently processing message type {message_id}')
|
||||
if mode == 1:
|
||||
# We skip the three bytes representing the length of message. It can be used to skip unwanted messages
|
||||
reader.read(3)
|
||||
# We read the three bytes representing the length of message. It can be used to skip unwanted messages
|
||||
r_size = self.read_size(reader)
|
||||
if message_id not in self.msg_selector:
|
||||
reader.read(r_size)
|
||||
return None
|
||||
return self.read_head_message(reader, message_id)
|
||||
elif mode == 0:
|
||||
# Old format with no bytes for message length
|
||||
|
|
|
|||
|
|
@ -1,52 +1,31 @@
|
|||
CREATE TABLE IF NOT EXISTS connector_events
|
||||
(
|
||||
sessionid BIGINT,
|
||||
connectioninformation_downlink BIGINT,
|
||||
connectioninformation_type VARCHAR(300),
|
||||
consolelog_level VARCHAR(300),
|
||||
consolelog_value VARCHAR(300),
|
||||
customevent_messageid BIGINT,
|
||||
customevent_name VARCHAR(300),
|
||||
customevent_payload VARCHAR(300),
|
||||
customevent_timestamp BIGINT,
|
||||
errorevent_message VARCHAR(300),
|
||||
errorevent_messageid BIGINT,
|
||||
errorevent_name VARCHAR(300),
|
||||
errorevent_payload VARCHAR(300),
|
||||
errorevent_source VARCHAR(300),
|
||||
errorevent_timestamp BIGINT,
|
||||
jsexception_message VARCHAR(300),
|
||||
jsexception_name VARCHAR(300),
|
||||
jsexception_payload VARCHAR(300),
|
||||
metadata_key VARCHAR(300),
|
||||
metadata_value VARCHAR(300),
|
||||
mouseclick_id BIGINT,
|
||||
mouseclick_hesitationtime BIGINT,
|
||||
mouseclick_label VARCHAR(300),
|
||||
pageevent_firstcontentfulpaint BIGINT,
|
||||
pageevent_firstpaint BIGINT,
|
||||
pageevent_messageid BIGINT,
|
||||
pageevent_referrer VARCHAR(300),
|
||||
pageevent_speedindex BIGINT,
|
||||
pageevent_timestamp BIGINT,
|
||||
pageevent_url VARCHAR(300),
|
||||
pagerendertiming_timetointeractive BIGINT,
|
||||
pagerendertiming_visuallycomplete BIGINT,
|
||||
rawcustomevent_name VARCHAR(300),
|
||||
rawcustomevent_payload VARCHAR(300),
|
||||
setviewportsize_height BIGINT,
|
||||
setviewportsize_width BIGINT,
|
||||
timestamp_timestamp BIGINT,
|
||||
user_anonymous_id VARCHAR(300),
|
||||
user_id VARCHAR(300),
|
||||
issueevent_messageid BIGINT,
|
||||
consolelog_level VARCHAR(5000),
|
||||
consolelog_value VARCHAR(5000),
|
||||
customevent_name VARCHAR(5000),
|
||||
customevent_payload VARCHAR(5000),
|
||||
jsexception_message VARCHAR(5000),
|
||||
jsexception_name VARCHAR(5000),
|
||||
jsexception_payload VARCHAR(5000),
|
||||
jsexception_metadata VARCHAR(5000),
|
||||
networkrequest_type VARCHAR(5000),
|
||||
networkrequest_method VARCHAR(5000),
|
||||
networkrequest_url VARCHAR(5000),
|
||||
networkrequest_request VARCHAR(5000),
|
||||
networkrequest_response VARCHAR(5000),
|
||||
networkrequest_status BIGINT,
|
||||
networkrequest_timestamp BIGINT,
|
||||
networkrequest_duration BIGINT,
|
||||
issueevent_message_id BIGINT,
|
||||
issueevent_timestamp BIGINT,
|
||||
issueevent_type VARCHAR(300),
|
||||
issueevent_contextstring VARCHAR(300),
|
||||
issueevent_context VARCHAR(300),
|
||||
issueevent_payload VARCHAR(300),
|
||||
customissue_name VARCHAR(300),
|
||||
customissue_payload VARCHAR(300),
|
||||
issueevent_type VARCHAR(5000),
|
||||
issueevent_context_string VARCHAR(5000),
|
||||
issueevent_context VARCHAR(5000),
|
||||
issueevent_payload VARCHAR(5000),
|
||||
issueevent_url VARCHAR(5000),
|
||||
customissue_name VARCHAR(5000),
|
||||
customissue_payload VARCHAR(5000),
|
||||
received_at BIGINT,
|
||||
batch_order_number BIGINT
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,238 +1,91 @@
|
|||
CREATE TABLE IF NOT EXISTS connector_events_detailed
|
||||
(
|
||||
sessionid BIGINT,
|
||||
clickevent_hesitationtime BIGINT,
|
||||
clickevent_label VARCHAR(300),
|
||||
clickevent_messageid BIGINT,
|
||||
clickevent_timestamp BIGINT,
|
||||
connectioninformation_downlink BIGINT,
|
||||
connectioninformation_type VARCHAR(300),
|
||||
consolelog_level VARCHAR(300),
|
||||
consolelog_value VARCHAR(300),
|
||||
cpuissue_duration BIGINT,
|
||||
cpuissue_rate BIGINT,
|
||||
cpuissue_timestamp BIGINT,
|
||||
createdocument BOOLEAN,
|
||||
createelementnode_id BIGINT,
|
||||
createelementnode_parentid BIGINT,
|
||||
cssdeleterule_index BIGINT,
|
||||
cssdeleterule_stylesheetid BIGINT,
|
||||
cssinsertrule_index BIGINT,
|
||||
cssinsertrule_rule VARCHAR(300),
|
||||
cssinsertrule_stylesheetid BIGINT,
|
||||
customevent_messageid BIGINT,
|
||||
customevent_name VARCHAR(300),
|
||||
customevent_payload VARCHAR(300),
|
||||
customevent_timestamp BIGINT,
|
||||
domdrop_timestamp BIGINT,
|
||||
errorevent_message VARCHAR(300),
|
||||
errorevent_messageid BIGINT,
|
||||
errorevent_name VARCHAR(300),
|
||||
errorevent_payload VARCHAR(300),
|
||||
errorevent_source VARCHAR(300),
|
||||
errorevent_timestamp BIGINT,
|
||||
fetch_duration BIGINT,
|
||||
fetch_method VARCHAR(300),
|
||||
fetch_request VARCHAR(300),
|
||||
fetch_response VARCHAR(300),
|
||||
fetch_status BIGINT,
|
||||
fetch_timestamp BIGINT,
|
||||
fetch_url VARCHAR(300),
|
||||
graphql_operationkind VARCHAR(300),
|
||||
graphql_operationname VARCHAR(300),
|
||||
graphql_response VARCHAR(300),
|
||||
graphql_variables VARCHAR(300),
|
||||
graphqlevent_messageid BIGINT,
|
||||
graphqlevent_name VARCHAR(300),
|
||||
graphqlevent_timestamp BIGINT,
|
||||
inputevent_label VARCHAR(300),
|
||||
inputevent_messageid BIGINT,
|
||||
inputevent_timestamp BIGINT,
|
||||
inputevent_value VARCHAR(300),
|
||||
inputevent_valuemasked BOOLEAN,
|
||||
jsexception_message VARCHAR(300),
|
||||
jsexception_name VARCHAR(300),
|
||||
jsexception_payload VARCHAR(300),
|
||||
memoryissue_duration BIGINT,
|
||||
memoryissue_rate BIGINT,
|
||||
memoryissue_timestamp BIGINT,
|
||||
metadata_key VARCHAR(300),
|
||||
metadata_value VARCHAR(300),
|
||||
mobx_payload VARCHAR(300),
|
||||
mobx_type VARCHAR(300),
|
||||
mouseclick_id BIGINT,
|
||||
mouseclick_hesitationtime BIGINT,
|
||||
mouseclick_label VARCHAR(300),
|
||||
mousemove_x BIGINT,
|
||||
mousemove_y BIGINT,
|
||||
movenode_id BIGINT,
|
||||
movenode_index BIGINT,
|
||||
movenode_parentid BIGINT,
|
||||
ngrx_action VARCHAR(300),
|
||||
ngrx_duration BIGINT,
|
||||
ngrx_state VARCHAR(300),
|
||||
otable_key VARCHAR(300),
|
||||
otable_value VARCHAR(300),
|
||||
pageevent_domcontentloadedeventend BIGINT,
|
||||
pageevent_domcontentloadedeventstart BIGINT,
|
||||
pageevent_firstcontentfulpaint BIGINT,
|
||||
pageevent_firstpaint BIGINT,
|
||||
pageevent_loaded BOOLEAN,
|
||||
pageevent_loadeventend BIGINT,
|
||||
pageevent_loadeventstart BIGINT,
|
||||
pageevent_messageid BIGINT,
|
||||
pageevent_referrer VARCHAR(300),
|
||||
pageevent_requeststart BIGINT,
|
||||
pageevent_responseend BIGINT,
|
||||
pageevent_responsestart BIGINT,
|
||||
pageevent_speedindex BIGINT,
|
||||
pageevent_timestamp BIGINT,
|
||||
pageevent_url VARCHAR(300),
|
||||
pageloadtiming_domcontentloadedeventend BIGINT,
|
||||
pageloadtiming_domcontentloadedeventstart BIGINT,
|
||||
pageloadtiming_firstcontentfulpaint BIGINT,
|
||||
pageloadtiming_firstpaint BIGINT,
|
||||
pageloadtiming_loadeventend BIGINT,
|
||||
pageloadtiming_loadeventstart BIGINT,
|
||||
pageloadtiming_requeststart BIGINT,
|
||||
pageloadtiming_responseend BIGINT,
|
||||
pageloadtiming_responsestart BIGINT,
|
||||
pagerendertiming_speedindex BIGINT,
|
||||
pagerendertiming_timetointeractive BIGINT,
|
||||
pagerendertiming_visuallycomplete BIGINT,
|
||||
performancetrack_frames BIGINT,
|
||||
performancetrack_ticks BIGINT,
|
||||
performancetrack_totaljsheapsize BIGINT,
|
||||
performancetrack_usedjsheapsize BIGINT,
|
||||
performancetrackaggr_avgcpu BIGINT,
|
||||
performancetrackaggr_avgfps BIGINT,
|
||||
performancetrackaggr_avgtotaljsheapsize BIGINT,
|
||||
performancetrackaggr_avgusedjsheapsize BIGINT,
|
||||
performancetrackaggr_maxcpu BIGINT,
|
||||
performancetrackaggr_maxfps BIGINT,
|
||||
performancetrackaggr_maxtotaljsheapsize BIGINT,
|
||||
performancetrackaggr_maxusedjsheapsize BIGINT,
|
||||
performancetrackaggr_mincpu BIGINT,
|
||||
performancetrackaggr_minfps BIGINT,
|
||||
performancetrackaggr_mintotaljsheapsize BIGINT,
|
||||
performancetrackaggr_minusedjsheapsize BIGINT,
|
||||
performancetrackaggr_timestampend BIGINT,
|
||||
performancetrackaggr_timestampstart BIGINT,
|
||||
profiler_args VARCHAR(300),
|
||||
profiler_duration BIGINT,
|
||||
profiler_name VARCHAR(300),
|
||||
profiler_result VARCHAR(300),
|
||||
rawcustomevent_name VARCHAR(300),
|
||||
rawcustomevent_payload VARCHAR(300),
|
||||
rawerrorevent_message VARCHAR(300),
|
||||
rawerrorevent_name VARCHAR(300),
|
||||
rawerrorevent_payload VARCHAR(300),
|
||||
rawerrorevent_source VARCHAR(300),
|
||||
rawerrorevent_timestamp BIGINT,
|
||||
redux_action VARCHAR(300),
|
||||
redux_duration BIGINT,
|
||||
redux_state VARCHAR(300),
|
||||
removenode_id BIGINT,
|
||||
removenodeattribute_id BIGINT,
|
||||
removenodeattribute_name VARCHAR(300),
|
||||
resourceevent_decodedbodysize BIGINT,
|
||||
resourceevent_duration BIGINT,
|
||||
resourceevent_encodedbodysize BIGINT,
|
||||
resourceevent_headersize BIGINT,
|
||||
resourceevent_messageid BIGINT,
|
||||
resourceevent_method VARCHAR(300),
|
||||
resourceevent_status BIGINT,
|
||||
resourceevent_success BOOLEAN,
|
||||
resourceevent_timestamp BIGINT,
|
||||
resourceevent_ttfb BIGINT,
|
||||
resourceevent_type VARCHAR(300),
|
||||
resourceevent_url VARCHAR(300),
|
||||
resourcetiming_decodedbodysize BIGINT,
|
||||
resourcetiming_duration BIGINT,
|
||||
resourcetiming_encodedbodysize BIGINT,
|
||||
resourcetiming_headersize BIGINT,
|
||||
resourcetiming_initiator VARCHAR(300),
|
||||
resourcetiming_timestamp BIGINT,
|
||||
resourcetiming_ttfb BIGINT,
|
||||
resourcetiming_url VARCHAR(300),
|
||||
sessiondisconnect BOOLEAN,
|
||||
sessiondisconnect_timestamp BIGINT,
|
||||
sessionend BOOLEAN,
|
||||
sessionend_timestamp BIGINT,
|
||||
sessionstart_projectid BIGINT,
|
||||
sessionstart_revid VARCHAR(300),
|
||||
sessionstart_timestamp BIGINT,
|
||||
sessionstart_trackerversion VARCHAR(300),
|
||||
sessionstart_useragent VARCHAR(300),
|
||||
sessionstart_userbrowser VARCHAR(300),
|
||||
sessionstart_userbrowserversion VARCHAR(300),
|
||||
sessionstart_usercountry VARCHAR(300),
|
||||
sessionstart_userdevice VARCHAR(300),
|
||||
sessionstart_userdeviceheapsize BIGINT,
|
||||
sessionstart_userdevicememorysize BIGINT,
|
||||
sessionstart_userdevicetype VARCHAR(300),
|
||||
sessionstart_useros VARCHAR(300),
|
||||
sessionstart_userosversion VARCHAR(300),
|
||||
sessionstart_useruuid VARCHAR(300),
|
||||
setcssdata_data BIGINT,
|
||||
setcssdata_id BIGINT,
|
||||
setinputchecked_checked BIGINT,
|
||||
setinputchecked_id BIGINT,
|
||||
setinputtarget_id BIGINT,
|
||||
setinputtarget_label BIGINT,
|
||||
setinputvalue_id BIGINT,
|
||||
setinputvalue_mask BIGINT,
|
||||
setinputvalue_value BIGINT,
|
||||
setnodeattribute_id BIGINT,
|
||||
setnodeattribute_name BIGINT,
|
||||
setnodeattribute_value BIGINT,
|
||||
setnodedata_data BIGINT,
|
||||
setnodedata_id BIGINT,
|
||||
setnodescroll_id BIGINT,
|
||||
setnodescroll_x BIGINT,
|
||||
setnodescroll_y BIGINT,
|
||||
setpagelocation_navigationstart BIGINT,
|
||||
setpagelocation_referrer VARCHAR(300),
|
||||
setpagelocation_url VARCHAR(300),
|
||||
setpagevisibility_hidden BOOLEAN,
|
||||
setviewportscroll_x BIGINT,
|
||||
setviewportscroll_y BIGINT,
|
||||
setviewportsize_height BIGINT,
|
||||
setviewportsize_width BIGINT,
|
||||
stateaction_type VARCHAR(300),
|
||||
stateactionevent_messageid BIGINT,
|
||||
stateactionevent_timestamp BIGINT,
|
||||
stateactionevent_type VARCHAR(300),
|
||||
timestamp_timestamp BIGINT,
|
||||
useranonymousid_id VARCHAR(300),
|
||||
userid_id VARCHAR(300),
|
||||
vuex_mutation VARCHAR(300),
|
||||
vuex_state VARCHAR(300),
|
||||
longtasks_timestamp BIGINT,
|
||||
longtasks_duration BIGINT,
|
||||
longtasks_context BIGINT,
|
||||
longtasks_containertype BIGINT,
|
||||
longtasks_containersrc VARCHAR(300),
|
||||
longtasks_containerid VARCHAR(300),
|
||||
longtasks_containername BIGINT,
|
||||
setnodeurlbasedattribute_id BIGINT,
|
||||
setnodeurlbasedattribute_name VARCHAR(300),
|
||||
setnodeurlbasedattribute_value VARCHAR(300),
|
||||
setnodeurlbasedattribute_baseurl VARCHAR(300),
|
||||
setstyledata_id BIGINT,
|
||||
setstyledata_data VARCHAR(300),
|
||||
setstyledata_baseurl VARCHAR(300),
|
||||
issueevent_messageid BIGINT,
|
||||
issueevent_timestamp BIGINT,
|
||||
issueevent_type VARCHAR(300),
|
||||
issueevent_contextstring VARCHAR(300),
|
||||
issueevent_context VARCHAR(300),
|
||||
issueevent_payload VARCHAR(300),
|
||||
technicalinfo_type VARCHAR(300),
|
||||
technicalinfo_value VARCHAR(300),
|
||||
customissue_name VARCHAR(300),
|
||||
customissue_payload VARCHAR(300),
|
||||
pageclose BIGINT,
|
||||
received_at BIGINT,
|
||||
batch_order_number BIGINT
|
||||
sessionid BIGINT,
|
||||
clickevent_hesitationtime BIGINT,
|
||||
clickevent_label VARCHAR(5000),
|
||||
clickevent_messageid BIGINT,
|
||||
clickevent_timestamp BIGINT,
|
||||
connectioninformation_downlink BIGINT,
|
||||
connectioninformation_type VARCHAR(5000),
|
||||
consolelog_level VARCHAR(5000),
|
||||
consolelog_value VARCHAR(5000),
|
||||
customevent_name VARCHAR(5000),
|
||||
customevent_payload VARCHAR(5000),
|
||||
fetch_duration BIGINT,
|
||||
fetch_method VARCHAR(5000),
|
||||
fetch_request VARCHAR(5000),
|
||||
fetch_response VARCHAR(5000),
|
||||
fetch_status BIGINT,
|
||||
fetch_timestamp BIGINT,
|
||||
fetch_url VARCHAR(5000),
|
||||
graphql_operationkind VARCHAR(5000),
|
||||
graphql_operationname VARCHAR(5000),
|
||||
graphql_response VARCHAR(5000),
|
||||
graphql_variables VARCHAR(5000),
|
||||
inputevent_label VARCHAR(5000),
|
||||
inputevent_messageid BIGINT,
|
||||
inputevent_timestamp BIGINT,
|
||||
inputevent_value VARCHAR(5000),
|
||||
inputevent_valuemasked BOOLEAN,
|
||||
jsexception_message VARCHAR(5000),
|
||||
jsexception_name VARCHAR(5000),
|
||||
jsexception_payload VARCHAR(5000),
|
||||
jsexception_metadata VARCHAR(5000),
|
||||
mouseclick_id BIGINT,
|
||||
mouseclick_hesitationtime BIGINT,
|
||||
mouseclick_label VARCHAR(5000),
|
||||
networkrequest_type VARCHAR(5000),
|
||||
networkrequest_method VARCHAR(5000),
|
||||
networkrequest_url VARCHAR(5000),
|
||||
networkrequest_request VARCHAR(5000),
|
||||
networkrequest_response VARCHAR(5000),
|
||||
networkrequest_status BIGINT,
|
||||
networkrequest_timestamp BIGINT,
|
||||
networkrequest_duration BIGINT,
|
||||
pageevent_domcontentloadedeventend BIGINT,
|
||||
pageevent_domcontentloadedeventstart BIGINT,
|
||||
pageevent_firstcontentfulpaint BIGINT,
|
||||
pageevent_firstpaint BIGINT,
|
||||
pageevent_loaded BOOLEAN,
|
||||
pageevent_loadeventend BIGINT,
|
||||
pageevent_loadeventstart BIGINT,
|
||||
pageevent_messageid BIGINT,
|
||||
pageevent_referrer VARCHAR(5000),
|
||||
pageevent_requeststart BIGINT,
|
||||
pageevent_responseend BIGINT,
|
||||
pageevent_responsestart BIGINT,
|
||||
pageevent_speedindex BIGINT,
|
||||
pageevent_timestamp BIGINT,
|
||||
pageevent_url VARCHAR(5000),
|
||||
sessionend_timestamp BIGINT,
|
||||
sessionend_encryption_key VARCHAR(5000),
|
||||
sessionstart_projectid BIGINT,
|
||||
sessionstart_revid VARCHAR(5000),
|
||||
sessionstart_timestamp BIGINT,
|
||||
sessionstart_trackerversion VARCHAR(5000),
|
||||
sessionstart_useragent VARCHAR(5000),
|
||||
sessionstart_userbrowser VARCHAR(5000),
|
||||
sessionstart_userbrowserversion VARCHAR(5000),
|
||||
sessionstart_usercountry VARCHAR(5000),
|
||||
sessionstart_userdevice VARCHAR(5000),
|
||||
sessionstart_userdeviceheapsize BIGINT,
|
||||
sessionstart_userdevicememorysize BIGINT,
|
||||
sessionstart_userdevicetype VARCHAR(5000),
|
||||
sessionstart_useros VARCHAR(5000),
|
||||
sessionstart_userosversion VARCHAR(5000),
|
||||
sessionstart_useruuid VARCHAR(5000),
|
||||
setpagelocation_navigationstart BIGINT,
|
||||
setpagelocation_referrer VARCHAR(5000),
|
||||
setpagelocation_url VARCHAR(5000),
|
||||
issueevent_message_id BIGINT,
|
||||
issueevent_timestamp BIGINT,
|
||||
issueevent_type VARCHAR(5000),
|
||||
issueevent_context_string VARCHAR(5000),
|
||||
issueevent_context VARCHAR(5000),
|
||||
issueevent_payload VARCHAR(5000),
|
||||
issueevent_url VARCHAR(5000),
|
||||
customissue_name VARCHAR(5000),
|
||||
customissue_payload VARCHAR(5000),
|
||||
received_at BIGINT,
|
||||
batch_order_number BIGINT
|
||||
);
|
||||
|
|
|
|||
|
|
@ -38,13 +38,8 @@ CREATE TABLE IF NOT EXISTS connector_user_sessions
|
|||
max_used_js_heap_size bigint,
|
||||
-- ISSUES AND EVENTS
|
||||
js_exceptions_count bigint,
|
||||
long_tasks_total_duration bigint,
|
||||
long_tasks_max_duration bigint,
|
||||
long_tasks_count bigint,
|
||||
inputs_count bigint,
|
||||
clicks_count bigint,
|
||||
issues_count bigint,
|
||||
issues VARCHAR,
|
||||
urls_count bigint,
|
||||
urls VARCHAR
|
||||
urls_count bigint
|
||||
);
|
||||
|
|
|
|||
103
ee/connectors/utils/cache.py
Normal file
103
ee/connectors/utils/cache.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
from utils.pg_client import PostgresClient
|
||||
from queue import Queue
|
||||
from decouple import config
|
||||
from time import time
|
||||
|
||||
|
||||
def _project_from_session(sessionId):
|
||||
"""Search projectId of requested sessionId in PG table sessions"""
|
||||
with PostgresClient() as conn:
|
||||
conn.execute(
|
||||
conn.mogrify("SELECT project_id FROM sessions WHERE session_id=%(sessionId)s LIMIT 1",
|
||||
{'sessionId': sessionId})
|
||||
)
|
||||
res = conn.fetchone()
|
||||
if res is None:
|
||||
print(f'[WARN] sessionid {sessionId} not found in sessions table')
|
||||
return None
|
||||
return res['project_id']
|
||||
|
||||
|
||||
class CachedSessions:
|
||||
|
||||
def __init__(self):
|
||||
"""cached_sessions of open and recently closed sessions with its current status.
|
||||
env:
|
||||
MAX_SESSION_LIFE: cache lifespan of session (default 7200 seconds)"""
|
||||
self.session_project = dict()
|
||||
self.max_alive_time = config('MAX_SESSION_LIFE', default=7200, cast=int) # Default 2 hours
|
||||
|
||||
def create(self, sessionid):
|
||||
"""Saves a new session with status OPEN and set its insertion time"""
|
||||
self.session_project[sessionid] = (time(), 'OPEN')
|
||||
|
||||
def add(self, sessionid):
|
||||
"""Handle the creation of a cached session or update its status if already in cache"""
|
||||
if sessionid in self.session_project.keys():
|
||||
if self.session_project[sessionid][1] == 'CLOSE':
|
||||
tmp = self.session_project[sessionid]
|
||||
self.session_project[sessionid] = (tmp[0], 'UPDATE')
|
||||
else:
|
||||
self.create(sessionid)
|
||||
|
||||
def close(self, sessionid):
|
||||
"""Sets status of session to closed session (received sessionend message)"""
|
||||
tmp = self.session_project[sessionid]
|
||||
old_status = tmp[1]
|
||||
self.session_project[sessionid] = (tmp[0], 'CLOSE')
|
||||
return old_status
|
||||
|
||||
def clear_sessions(self):
|
||||
"""Delete all sessions that reached max_alive_time"""
|
||||
to_clean_list = list()
|
||||
current_time = time()
|
||||
for sessionid, values in self.session_project.items():
|
||||
if current_time - values[0] > self.max_alive_time:
|
||||
to_clean_list.append(sessionid)
|
||||
del self.session_project[sessionid]
|
||||
return to_clean_list
|
||||
|
||||
|
||||
class ProjectFilter:
|
||||
|
||||
def __init__(self, filter=list()):
|
||||
"""Filters all sessions that comes from selected projects. This class reads from PG to find projectId and uses cache to avoid duplicated requests.
|
||||
env:
|
||||
max_cache_size: max allowed cache lenght - starts cleanup when oversize
|
||||
cache_lifespan: max lifetime of cached - if surpased it is deleted in cleanup phase"""
|
||||
self.filter = filter
|
||||
self.cache = dict()
|
||||
self.cached_sessions = CachedSessions()
|
||||
self.to_clean = list()
|
||||
self.count_bad = 0
|
||||
self.max_cache_size = config('max_cache_size', default=50, cast=int)
|
||||
self.cache_lifespan = config('cache_lifespan', default=900, cast=int)
|
||||
|
||||
def is_valid(self, sessionId):
|
||||
"""Verify if sessionId is from selected project"""
|
||||
if len(self.filter)==0:
|
||||
return True
|
||||
elif sessionId in self.cache.keys():
|
||||
return self.cache[sessionId][1]
|
||||
else:
|
||||
found_project_id = _project_from_session(sessionId)
|
||||
if found_project_id is None:
|
||||
self.count_bad += 1
|
||||
return False
|
||||
else:
|
||||
project_is_valid = found_project_id in self.filter
|
||||
self.cache[sessionId] = [time(), project_is_valid]
|
||||
return project_is_valid
|
||||
|
||||
def cleanup(self):
|
||||
"""Deletes cache when reached cache_lifespan value"""
|
||||
current_time = time()
|
||||
self.cache = {sessionid: values for sessionid, values in self.cache.items() if current_time - values[0] < self.cache_lifespan}
|
||||
|
||||
def handle_clean(self):
|
||||
"""Verifies and execute cleanup if needed"""
|
||||
if len(self.filter)==0:
|
||||
return
|
||||
elif len(self.cache) > self.max_cache_size:
|
||||
self.cleanup()
|
||||
|
||||
182
ee/connectors/utils/pg_client.py
Normal file
182
ee/connectors/utils/pg_client.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
import logging
|
||||
import time
|
||||
from threading import Semaphore
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
from decouple import config
|
||||
from psycopg2 import pool
|
||||
|
||||
logging.basicConfig(level=config("LOGLEVEL", default=logging.INFO))
|
||||
logging.getLogger('apscheduler').setLevel(config("LOGLEVEL", default=logging.INFO))
|
||||
|
||||
_PG_CONFIG = {"host": config("pg_host"),
|
||||
"database": config("pg_dbname"),
|
||||
"user": config("pg_user"),
|
||||
"password": config("pg_password"),
|
||||
"port": config("pg_port", cast=int),
|
||||
"application_name": config("APP_NAME", default="PY")}
|
||||
PG_CONFIG = dict(_PG_CONFIG)
|
||||
if config("PG_TIMEOUT", cast=int, default=0) > 0:
|
||||
PG_CONFIG["options"] = f"-c statement_timeout={config('PG_TIMEOUT', cast=int) * 1000}"
|
||||
|
||||
|
||||
class ORThreadedConnectionPool(psycopg2.pool.ThreadedConnectionPool):
|
||||
def __init__(self, minconn, maxconn, *args, **kwargs):
|
||||
self._semaphore = Semaphore(maxconn)
|
||||
super().__init__(minconn, maxconn, *args, **kwargs)
|
||||
|
||||
def getconn(self, *args, **kwargs):
|
||||
self._semaphore.acquire()
|
||||
try:
|
||||
return super().getconn(*args, **kwargs)
|
||||
except psycopg2.pool.PoolError as e:
|
||||
if str(e) == "connection pool is closed":
|
||||
make_pool()
|
||||
raise e
|
||||
|
||||
def putconn(self, *args, **kwargs):
|
||||
try:
|
||||
super().putconn(*args, **kwargs)
|
||||
self._semaphore.release()
|
||||
except psycopg2.pool.PoolError as e:
|
||||
if str(e) == "trying to put unkeyed connection":
|
||||
print("!!! trying to put unkeyed connection")
|
||||
print(f"env-PG_POOL:{config('PG_POOL', default=None)}")
|
||||
return
|
||||
raise e
|
||||
|
||||
|
||||
postgreSQL_pool: ORThreadedConnectionPool = None
|
||||
|
||||
RETRY_MAX = config("PG_RETRY_MAX", cast=int, default=50)
|
||||
RETRY_INTERVAL = config("PG_RETRY_INTERVAL", cast=int, default=2)
|
||||
RETRY = 0
|
||||
|
||||
|
||||
def make_pool():
|
||||
if not config('PG_POOL', cast=bool, default=True):
|
||||
return
|
||||
global postgreSQL_pool
|
||||
global RETRY
|
||||
if postgreSQL_pool is not None:
|
||||
try:
|
||||
postgreSQL_pool.closeall()
|
||||
except (Exception, psycopg2.DatabaseError) as error:
|
||||
logging.error("Error while closing all connexions to PostgreSQL", error)
|
||||
try:
|
||||
postgreSQL_pool = ORThreadedConnectionPool(config("PG_MINCONN", cast=int, default=20),
|
||||
config("PG_MAXCONN", cast=int, default=80),
|
||||
**PG_CONFIG)
|
||||
if (postgreSQL_pool):
|
||||
logging.info("Connection pool created successfully")
|
||||
except (Exception, psycopg2.DatabaseError) as error:
|
||||
logging.error("Error while connecting to PostgreSQL", error)
|
||||
if RETRY < RETRY_MAX:
|
||||
RETRY += 1
|
||||
logging.info(f"waiting for {RETRY_INTERVAL}s before retry n°{RETRY}")
|
||||
time.sleep(RETRY_INTERVAL)
|
||||
make_pool()
|
||||
else:
|
||||
raise error
|
||||
|
||||
|
||||
class PostgresClient:
|
||||
connection = None
|
||||
cursor = None
|
||||
long_query = False
|
||||
unlimited_query = False
|
||||
|
||||
def __init__(self, long_query=False, unlimited_query=False, use_pool=True):
|
||||
self.long_query = long_query
|
||||
self.unlimited_query = unlimited_query
|
||||
self.use_pool = use_pool
|
||||
if unlimited_query:
|
||||
long_config = dict(_PG_CONFIG)
|
||||
long_config["application_name"] += "-UNLIMITED"
|
||||
self.connection = psycopg2.connect(**long_config)
|
||||
elif long_query:
|
||||
long_config = dict(_PG_CONFIG)
|
||||
long_config["application_name"] += "-LONG"
|
||||
long_config["options"] = f"-c statement_timeout=" \
|
||||
f"{config('pg_long_timeout', cast=int, default=5 * 60) * 1000}"
|
||||
self.connection = psycopg2.connect(**long_config)
|
||||
elif not use_pool or not config('PG_POOL', cast=bool, default=True):
|
||||
single_config = dict(_PG_CONFIG)
|
||||
single_config["application_name"] += "-NOPOOL"
|
||||
single_config["options"] = f"-c statement_timeout={config('PG_TIMEOUT', cast=int, default=30) * 1000}"
|
||||
self.connection = psycopg2.connect(**single_config)
|
||||
else:
|
||||
self.connection = postgreSQL_pool.getconn()
|
||||
|
||||
def __enter__(self):
|
||||
if self.cursor is None:
|
||||
self.cursor = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||
self.cursor.cursor_execute = self.cursor.execute
|
||||
self.cursor.execute = self.__execute
|
||||
self.cursor.recreate = self.recreate_cursor
|
||||
return self.cursor
|
||||
|
||||
def __exit__(self, *args):
|
||||
try:
|
||||
self.connection.commit()
|
||||
self.cursor.close()
|
||||
if not self.use_pool or self.long_query or self.unlimited_query:
|
||||
self.connection.close()
|
||||
except Exception as error:
|
||||
logging.error("Error while committing/closing PG-connection", error)
|
||||
if str(error) == "connection already closed" \
|
||||
and self.use_pool \
|
||||
and not self.long_query \
|
||||
and not self.unlimited_query \
|
||||
and config('PG_POOL', cast=bool, default=True):
|
||||
logging.info("Recreating the connexion pool")
|
||||
make_pool()
|
||||
else:
|
||||
raise error
|
||||
finally:
|
||||
if config('PG_POOL', cast=bool, default=True) \
|
||||
and self.use_pool \
|
||||
and not self.long_query \
|
||||
and not self.unlimited_query:
|
||||
postgreSQL_pool.putconn(self.connection)
|
||||
|
||||
def __execute(self, query, vars=None):
|
||||
try:
|
||||
result = self.cursor.cursor_execute(query=query, vars=vars)
|
||||
except psycopg2.Error as error:
|
||||
logging.error(f"!!! Error of type:{type(error)} while executing query:")
|
||||
logging.error(query)
|
||||
logging.info("starting rollback to allow future execution")
|
||||
self.connection.rollback()
|
||||
raise error
|
||||
return result
|
||||
|
||||
def recreate_cursor(self, rollback=False):
|
||||
if rollback:
|
||||
try:
|
||||
self.connection.rollback()
|
||||
except Exception as error:
|
||||
logging.error("Error while rollbacking connection for recreation", error)
|
||||
try:
|
||||
self.cursor.close()
|
||||
except Exception as error:
|
||||
logging.error("Error while closing cursor for recreation", error)
|
||||
self.cursor = None
|
||||
return self.__enter__()
|
||||
|
||||
|
||||
async def init():
|
||||
logging.info(f">PG_POOL:{config('PG_POOL', default=None)}")
|
||||
if config('PG_POOL', cast=bool, default=True):
|
||||
make_pool()
|
||||
|
||||
|
||||
async def terminate():
|
||||
global postgreSQL_pool
|
||||
if postgreSQL_pool is not None:
|
||||
try:
|
||||
postgreSQL_pool.closeall()
|
||||
logging.info("Closed all connexions to PostgreSQL")
|
||||
except (Exception, psycopg2.DatabaseError) as error:
|
||||
logging.error("Error while closing all connexions to PostgreSQL", error)
|
||||
Loading…
Add table
Reference in a new issue