From d578f5e097ce3c4628955e1f755cf2445b2de396 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Tue, 12 Jul 2022 12:16:08 +0200 Subject: [PATCH 01/33] Updated messages methods. New values: - [x] IOSIssueEvent (111) - [x] IOSBatchMeta (107) - [x] IOSNetworkCall (105) - [x] IOSPerformanceEvent (102) - [x] IOSInputEvent (101) - [x] IOSClickEvent (100) - [x] IOSScreenEnter (98) - [x] IOSCrash (97) - [x] IOSScreenChanges (96) - [x] IOSCustomEvent (93) - [x] CreateIFrameDocument (70) - [x] CSSInsertRuleURLBased (67) - [x] AssetCache (66) - [x] FetchEvent (51) - [x] MouseClick (69) <- MouseClickDepricated (21) --- ee/connectors/msgcodec/codec.py | 130 ++++++++++++++++++++++- ee/connectors/msgcodec/messages.py | 161 ++++++++++++++++++++++++++++- 2 files changed, 289 insertions(+), 2 deletions(-) diff --git a/ee/connectors/msgcodec/codec.py b/ee/connectors/msgcodec/codec.py index 18f074a33..da06b5262 100644 --- a/ee/connectors/msgcodec/codec.py +++ b/ee/connectors/msgcodec/codec.py @@ -215,7 +215,7 @@ class MessageCodec(Codec): ) if message_id == 21: - return MouseClick( + return MouseClickDepricated( id=self.read_uint(reader), hesitation_time=self.read_uint(reader), label=self.read_string(reader) @@ -461,6 +461,18 @@ class MessageCodec(Codec): name=self.read_string(reader) ) + if message_id == 51: + return FetchEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + method=self.read_string(reader), + url=self.read_string(reader), + request=self.read_string(reader), + response=self.read_string(reader), + status=self.read_uint(reader), + duration=self.read_uint(reader) + ) + if message_id == 52: return DomDrop( timestamp=self.read_uint(reader) @@ -558,6 +570,33 @@ class MessageCodec(Codec): if message_id == 65: return PageClose() + if mesage_id == 66: + return AssetCache( + url=self.read_string(reader) + ) + + if message_id == 67: + return CSSInsertRuleURLBased( + id=self.read_uint(reader), + rule=self.read_string(reader), + index=self.read_uint(reader), + based_url=self.read_string(reader) + ) + + if message_id == 69: + return MouseClick( + id=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader), + selector=self.read_string(reader) + ) + + if message_id == 70: + return CreateIFrameDocument( + frame_id=self.read_uint(reader), + id=self.read_uint(reader) + ) + if message_id == 90: return IOSSessionStart( timestamp=self.read_uint(reader), @@ -585,6 +624,14 @@ class MessageCodec(Codec): value=self.read_string(reader) ) + if message_id == 93: + return IOSCustomEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + payload=self.read_string(reader) + ) + if message_id == 94: return IOSUserID( timestamp=self.read_uint(reader), @@ -599,6 +646,33 @@ class MessageCodec(Codec): value=self.read_string(reader) ) + if message_id == 96: + return IOSScreenChanges( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + x=self.read_uint(reader), + y=self.read_uint(reader), + width=self.read_uint(reader), + height=self.read_uint(reader) + ) + + if message_id == 97: + return IOSCrash( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + reason=self.read_string(reader), + stacktrace=self.read_string(reader) + ) + + if message_id == 98: + return IOSSCreenEnter( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + title=self.read_string(reader), + view_name=self.read_string(reader) + ) + if message_id == 99: return IOSScreenLeave( timestamp=self.read_uint(reader), @@ -607,6 +681,32 @@ class MessageCodec(Codec): view_name=self.read_string(reader) ) + if message_id == 100: + return IOSClickEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + label=self.read_string(reader), + x=self.read_uint(reader), + y=self.read_uint(reader) + ) + + if message_if == 101: + return IOSInputEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader), + value_masked=self.read_boolean(reader), + label=self.read_string(reader) + ) + + if message_id == 102: + return IOSPreformanceEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_uint(reader) + ) + if message_id == 103: return IOSLog( timestamp=self.read_uint(reader), @@ -622,6 +722,26 @@ class MessageCodec(Codec): content=self.read_string(reader) ) + if message_id == 105: + return IOSNetworkCall( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + duration=self.read_uint(reader), + headers=self.read_string(reader), + body=self.read_string(reader), + url=self.read_string(reader), + success=self.read_boolean(reader), + method=self.read_string(reader), + status=self.read_uint(reader) + ) + + if message_id == 107: + return IOSBatchMeta( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + first_index=self.read_uint(reader) + ) + if message_id == 110: return IOSPerformanceAggregated( timestamp_start=self.read_uint(reader), @@ -639,6 +759,14 @@ class MessageCodec(Codec): avg_battery=self.read_uint(reader), max_battery=self.read_uint(reader) ) + if message_id == 111: + return IOSIssueEvent( + timestamp=self.read_uint(reader), + type=self.read_string(reader), + context_string=self.read_string(reader), + context=self.read_string(reader), + payload=self.read_string(reader) + ) def read_message_id(self, reader: io.BytesIO) -> int: """ diff --git a/ee/connectors/msgcodec/messages.py b/ee/connectors/msgcodec/messages.py index c6e53b445..c026202fb 100644 --- a/ee/connectors/msgcodec/messages.py +++ b/ee/connectors/msgcodec/messages.py @@ -194,7 +194,7 @@ class MouseMove(Message): self.y = y -class MouseClick(Message): +class MouseClickDepricated(Message): __id__ = 21 def __init__(self, id, hesitation_time, label: str): @@ -518,6 +518,21 @@ class GraphQLEvent(Message): self.name = name +class FetchEvent(Message): + __id__ = 51 + + def __init__(self, message_id, timestamp, method: str, url, request, response: str, + status, duration): + self.message_id = message_id + self.timestamp = timestamp + self.method = method + self.url = url + self.request = request + self.response = response + self.status = status + self.duration = duration + + class DomDrop(Message): __id__ = 52 @@ -646,6 +661,41 @@ class PageClose(Message): __id__ = 65 +class AssetCache(Message): + __id__ = 66 + + def __init__(self, url): + self.url = url + + +class CSSInsertRuleURLBased(Message): + __id__ = 67 + + def __init__(self, id, rule, index, base_url): + self.id = id + self.rule = rule + self.index = index + self.base_url = base_url + + +class MouseClick(Message): + __id__ = 69 + + def __init__(self, id, hesitation_time, label: str, selector): + self.id = id + self.hesitation_time = hesitation_time + self.label = label + self.selector = selector + + +class CreateIFrameDocument(Message): + __id__ = 70 + + def __init__(self, frame_id, id): + self.frame_id = frame_id + self.id = id + + class IOSSessionStart(Message): __id__ = 90 @@ -681,6 +731,16 @@ class IOSMetadata(Message): self.value = value +class IOSCustomEvent(Message): + __id__ = 93 + + def __init__(self, timestamp, length, name: str, payload: str): + self.timestamp = timestamp + self.length = length + self.name = name + self.payload = payload + + class IOSUserID(Message): __id__ = 94 @@ -699,6 +759,39 @@ class IOSUserAnonymousID(Message): self.value = value +class IOSScreenChanges(Message): + __id__ = 96 + + def __init__(self, timestamp, length, x, y, width, height): + self.timestamp = timestamp + self.length = length + self.x = x + self.y = y + self.width = width + self.height = height + + +class IOSCrash(Message): + __id__ = 97 + + def __init__(self, timestamp, length, name: str, reason: str, stacktrace): + self.timestamp = timestamp + self.length = length + self.name = name + self.reason = reason + self.stacktrace = stacktrace + + +class IOSScreenEnter(Message): + __id__ = 98 + + def __init__(self, timestamp, length, title, view_name): + self.timestamp = timestamp + self.length = length + self.title = title + self.view_name = view_name + + class IOSScreenLeave(Message): __id__ = 99 @@ -709,6 +802,37 @@ class IOSScreenLeave(Message): self.view_name = view_name +class IOSClickEvent(Message): + __id__ = 100 + + def __init__(self, timestamp, length, label, x, y): + self.timestamp = timestamp + self.length = length + self.label = label + self.x = x + self.y = y + + +class IOSInputEvent(Message): + __id__ = 101 + + def __init__(self, timestamp, length, value: str, value_masked: bool, label: str): + self.timestamp = timestamp + self.length = length + self.value_masked = value_masked + self.label = label + + +class IOSPerformanceEvent(Message): + __id__ = 102 + + def __init__(self, timestamp, length, name: str, value): + self.timestamp = timestamp + self.length = length + self.name = name + self.value = value + + class IOSLog(Message): __id__ = 103 @@ -728,6 +852,30 @@ class IOSInternalError(Message): self.content = content +class IOSNetworkCall(Message): + __id__ = 105 + + def __init__(self, timestamp, length, duration, headers, body, url, success: bool, method: str, status): + self.timestamp = timestamp + self.length = length + self.duration = duration + self.headers = headers + self.body = body + self.url = url + self.success = success + self.method = method + self.status = status + + +class IOSBatchMeta(Message): + __id__ = 107 + + def __init__(self, timestamp, length, first_index): + self.timestamp = timestamp + self.length = length + self.first_index = first_index + + class IOSPerformanceAggregated(Message): __id__ = 110 @@ -750,3 +898,14 @@ class IOSPerformanceAggregated(Message): self.min_battery = min_battery self.avg_battery = avg_battery self.max_battery = max_battery + + +class IOSIssueEvent(Message): + __id__ = 111 + + def __init__(self, timestamp, type: str, context_string: str, context: str, payload: str): + self.timestamp = timestamp + self.type = type + self.context_string = context_string + self.context = context + self.payload = payload From c271a0b64e2e212a9fdba58b5e8d5f44dce60aed Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 13 Jul 2022 12:39:55 +0200 Subject: [PATCH 02/33] Solved some properties issues --- ee/connectors/msgcodec/codec.py | 31 +++++++++++++++++++++++++----- ee/connectors/msgcodec/messages.py | 8 ++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/ee/connectors/msgcodec/codec.py b/ee/connectors/msgcodec/codec.py index da06b5262..e26653478 100644 --- a/ee/connectors/msgcodec/codec.py +++ b/ee/connectors/msgcodec/codec.py @@ -1,5 +1,5 @@ import io - +from typing import List from msgcodec.messages import * @@ -24,6 +24,8 @@ class Codec: i = 0 # n of byte (max 9 for uint64) while True: b = reader.read(1) + if len(b) == 0: + raise IndexError('bytes out of range') num = int.from_bytes(b, "big", signed=False) # print(i, x) @@ -72,8 +74,20 @@ class MessageCodec(Codec): def decode(self, b: bytes) -> Message: reader = io.BytesIO(b) - message_id = self.read_message_id(reader) + return self.read_head_message(reader) + def decode_detailed(self, b: bytes) -> List[Message]: + reader = io.BytesIO(b) + messages_list = list() + while True: + try: + messages_list.append(self.read_head_message(reader)) + except IndexError: + break + return messages_list + + def read_head_message(self, reader: io.BytesIO) -> Message: + message_id = self.read_message_id(reader) if message_id == 0: return Timestamp( timestamp=self.read_uint(reader) @@ -570,7 +584,7 @@ class MessageCodec(Codec): if message_id == 65: return PageClose() - if mesage_id == 66: + if message_id == 66: return AssetCache( url=self.read_string(reader) ) @@ -580,7 +594,7 @@ class MessageCodec(Codec): id=self.read_uint(reader), rule=self.read_string(reader), index=self.read_uint(reader), - based_url=self.read_string(reader) + base_url=self.read_string(reader) ) if message_id == 69: @@ -597,6 +611,13 @@ class MessageCodec(Codec): id=self.read_uint(reader) ) + if message_id == 80: + return BatchMeta( + page_no=self.read_uint(reader), + first_index=self.read_uint(reader), + timestamp=self.read_int(reader) + ) + if message_id == 90: return IOSSessionStart( timestamp=self.read_uint(reader), @@ -690,7 +711,7 @@ class MessageCodec(Codec): y=self.read_uint(reader) ) - if message_if == 101: + if message_id == 101: return IOSInputEvent( timestamp=self.read_uint(reader), length=self.read_uint(reader), diff --git a/ee/connectors/msgcodec/messages.py b/ee/connectors/msgcodec/messages.py index c026202fb..bc451b287 100644 --- a/ee/connectors/msgcodec/messages.py +++ b/ee/connectors/msgcodec/messages.py @@ -696,6 +696,14 @@ class CreateIFrameDocument(Message): self.id = id +class BatchMeta(Message): + __id__ = 80 + + def __init__(self, page_no, first_index, timestamp): + self.page_no = page_no + self.first_index = first_index + self.timestamp = timestamp + class IOSSessionStart(Message): __id__ = 90 From 2e8f582bf77b54fe99abf4751b5e168246648da3 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 13 Jul 2022 12:44:51 +0200 Subject: [PATCH 03/33] consumer.py now working with new message types and BigQuery --- ee/connectors/consumer.py | 86 ++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/ee/connectors/consumer.py b/ee/connectors/consumer.py index dfa856501..f65633f7d 100644 --- a/ee/connectors/consumer.py +++ b/ee/connectors/consumer.py @@ -30,59 +30,61 @@ def main(): codec = MessageCodec() consumer = KafkaConsumer(security_protocol="SSL", - bootstrap_servers=[os.environ['KAFKA_SERVER_1'], - os.environ['KAFKA_SERVER_2']], - group_id=f"connector_{DATABASE}", + bootstrap_servers=os.environ['KAFKA_SERVER_2'], + os.environ['KAFKA_SERVER_1']], + group_id=f"my_test3_connector_{DATABASE}", auto_offset_reset="earliest", - enable_auto_commit=False) + enable_auto_commit=False + ) - consumer.subscribe(topics=["events", "messages"]) + consumer.subscribe(topics=["raw", "raw_ios"]) print("Kafka consumer subscribed") for msg in consumer: - message = codec.decode(msg.value) - if message is None: + messages = codec.decode_detailed(msg.value) + session_id = codec.decode_key(msg.key) + if messages is None: print('-') continue + for message in messages: + if LEVEL == 'detailed': + n = handle_message(message) + elif LEVEL == 'normal': + n = handle_normal_message(message) - if LEVEL == 'detailed': - n = handle_message(message) - elif LEVEL == 'normal': - n = handle_normal_message(message) - - session_id = codec.decode_key(msg.key) - sessions[session_id] = handle_session(sessions[session_id], message) - if sessions[session_id]: - sessions[session_id].sessionid = session_id - - # put in a batch for insertion if received a SessionEnd - if isinstance(message, SessionEnd): + #session_id = codec.decode_key(msg.key) + sessions[session_id] = handle_session(sessions[session_id], message) if sessions[session_id]: - sessions_batch.append(sessions[session_id]) + sessions[session_id].sessionid = session_id - # try to insert sessions - if len(sessions_batch) >= sessions_batch_size: - attempt_session_insert(sessions_batch) - for s in sessions_batch: - try: - del sessions[s.sessionid] - except KeyError as e: - print(repr(e)) - sessions_batch = [] + # put in a batch for insertion if received a SessionEnd + if isinstance(message, SessionEnd): + if sessions[session_id]: + sessions_batch.append(sessions[session_id]) - if n: - n.sessionid = session_id - n.received_at = int(datetime.now().timestamp() * 1000) - n.batch_order_number = len(batch) - batch.append(n) - else: - continue + # try to insert sessions + if len(sessions_batch) >= sessions_batch_size: + attempt_session_insert(sessions_batch) + for s in sessions_batch: + try: + del sessions[s.sessionid] + except KeyError as e: + print(repr(e)) + sessions_batch = [] - # insert a batch of events - if len(batch) >= batch_size: - attempt_batch_insert(batch) - batch = [] - consumer.commit() - print("sessions in cache:", len(sessions)) + if n: + n.sessionid = session_id + n.received_at = int(datetime.now().timestamp() * 1000) + n.batch_order_number = len(batch) + batch.append(n) + else: + continue + + # insert a batch of events + if len(batch) >= batch_size: + attempt_batch_insert(batch) + batch = [] + consumer.commit() + print("sessions in cache:", len(sessions)) def attempt_session_insert(sess_batch): From fb42eded2c059d47508e148af886685e16cc5afe Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 13 Jul 2022 13:36:08 +0200 Subject: [PATCH 04/33] fixed minor issue consumer.py --- ee/connectors/consumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/connectors/consumer.py b/ee/connectors/consumer.py index f65633f7d..acf55f2d5 100644 --- a/ee/connectors/consumer.py +++ b/ee/connectors/consumer.py @@ -30,7 +30,7 @@ def main(): codec = MessageCodec() consumer = KafkaConsumer(security_protocol="SSL", - bootstrap_servers=os.environ['KAFKA_SERVER_2'], + bootstrap_servers=[os.environ['KAFKA_SERVER_2'], os.environ['KAFKA_SERVER_1']], group_id=f"my_test3_connector_{DATABASE}", auto_offset_reset="earliest", From eabc52b7a666fa5e85490b0f6f6bd75bc48e50e1 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 15 Jul 2022 17:06:11 +0200 Subject: [PATCH 05/33] Fixed and updated detailed sessions --- ee/connectors/bigquery_utils/create_table.py | 240 +++++++++++++++++++ 1 file changed, 240 insertions(+) diff --git a/ee/connectors/bigquery_utils/create_table.py b/ee/connectors/bigquery_utils/create_table.py index 4b166e4ae..9fd7ef1cc 100644 --- a/ee/connectors/bigquery_utils/create_table.py +++ b/ee/connectors/bigquery_utils/create_table.py @@ -128,6 +128,246 @@ def create_events_table(creds_file, table_id): bigquery.SchemaField("batch_order_number", "INT64")] create_table(creds_file, table_id, schema) +def create_events_detailed_table(creds_file, table_id): + #TODO: change types according to field + schema = [ + bigquery.SchemaField("sessionid", "INT64", mode="REQUIRED"), + bigquery.SchemaField("clickevent_hesitationtime", "INT64"), + bigquery.SchemaField("clickevent_label", "STRING"), + bigquery.SchemaField("clickevent_messageid", "INT64"), + bigquery.SchemaField("clickevent_timestamp", "INT64"), + bigquery.SchemaField("connectioninformation_downlink", "INT64"), + bigquery.SchemaField("connectioninformation_type", "STRING"), + bigquery.SchemaField("consolelog_level", "STRING"), + bigquery.SchemaField("consolelog_value", "STRING"), + bigquery.SchemaField("cpuissue_duration", "INT64"), + bigquery.SchemaField("cpuissue_rate", "INT64"), + bigquery.SchemaField("cpuissue_timestamp", "INT64"), + bigquery.SchemaField("createdocument", "BOOL"), + bigquery.SchemaField("createelementnode_id", "INT64"), + bigquery.SchemaField("createelementnode_parentid", "INT64"), + bigquery.SchemaField("cssdeleterule_index", "INT64"), + bigquery.SchemaField("cssdeleterule_stylesheetid", "INT64"), + bigquery.SchemaField("cssinsertrule_index", "INT64"), + bigquery.SchemaField("cssinsertrule_rule", "STRING"), + bigquery.SchemaField("cssinsertrule_stylesheetid", "INT64"), + bigquery.SchemaField("customevent_messageid", "INT64"), + bigquery.SchemaField("customevent_name", "STRING"), + bigquery.SchemaField("customevent_payload", "STRING"), + bigquery.SchemaField("customevent_timestamp", "INT64"), + bigquery.SchemaField("domdrop_timestamp", "INT64"), + bigquery.SchemaField("errorevent_message", "STRING"), + bigquery.SchemaField("errorevent_messageid", "INT64"), + bigquery.SchemaField("errorevent_name", "STRING"), + bigquery.SchemaField("errorevent_payload", "STRING"), + bigquery.SchemaField("errorevent_source", "STRING"), + bigquery.SchemaField("errorevent_timestamp", "INT64"), + bigquery.SchemaField("fetch_duration", "INT64"), + bigquery.SchemaField("fetch_method", "STRING"), + bigquery.SchemaField("fetch_request", "STRING"), + bigquery.SchemaField("fetch_response", "STRING"), + bigquery.SchemaField("fetch_status", "INT64"), + bigquery.SchemaField("fetch_timestamp", "INT64"), + bigquery.SchemaField("fetch_url", "STRING"), + bigquery.SchemaField("graphql_operationkind", "STRING"), + bigquery.SchemaField("graphql_operationname", "STRING"), + bigquery.SchemaField("graphql_response", "STRING"), + bigquery.SchemaField("graphql_variables", "STRING"), + bigquery.SchemaField("graphqlevent_messageid", "INT64"), + bigquery.SchemaField("graphqlevent_name", "STRING"), + bigquery.SchemaField("graphqlevent_timestamp", "INT64"), + bigquery.SchemaField("inputevent_label", "STRING"), + bigquery.SchemaField("inputevent_messageid", "INT64"), + bigquery.SchemaField("inputevent_timestamp", "INT64"), + bigquery.SchemaField("inputevent_value", "STRING"), + bigquery.SchemaField("inputevent_valuemasked", "BOOL"), + bigquery.SchemaField("jsexception_message", "STRING"), + bigquery.SchemaField("jsexception_name", "STRING"), + bigquery.SchemaField("jsexception_payload", "STRING"), + bigquery.SchemaField("memoryissue_duration", "INT64"), + bigquery.SchemaField("memoryissue_rate", "INT64"), + bigquery.SchemaField("memoryissue_timestamp", "INT64"), + bigquery.SchemaField("metadata_key", "STRING"), + bigquery.SchemaField("metadata_value", "STRING"), + bigquery.SchemaField("mobx_payload", "STRING"), + bigquery.SchemaField("mobx_type", "STRING"), + bigquery.SchemaField("mouseclick_id", "INT64"), + bigquery.SchemaField("mouseclick_hesitationtime", "INT64"), + bigquery.SchemaField("mouseclick_label", "STRING"), + bigquery.SchemaField("mousemove_x", "INT64"), + bigquery.SchemaField("mousemove_y", "INT64"), + bigquery.SchemaField("movenode_id", "INT64"), + bigquery.SchemaField("movenode_index", "INT64"), + bigquery.SchemaField("movenode_parentid", "INT64"), + bigquery.SchemaField("ngrx_action", "STRING"), + bigquery.SchemaField("ngrx_duration", "INT64"), + bigquery.SchemaField("ngrx_state", "STRING"), + bigquery.SchemaField("otable_key", "STRING"), + bigquery.SchemaField("otable_value", "STRING"), + bigquery.SchemaField("pageevent_domcontentloadedeventend", "INT64"), + bigquery.SchemaField("pageevent_domcontentloadedeventstart", "INT64"), + bigquery.SchemaField("pageevent_firstcontentfulpaint", "INT64"), + bigquery.SchemaField("pageevent_firstpaint", "INT64"), + bigquery.SchemaField("pageevent_loaded", "BOOL"), + bigquery.SchemaField("pageevent_loadeventend", "INT64"), + bigquery.SchemaField("pageevent_loadeventstart", "INT64"), + bigquery.SchemaField("pageevent_messageid", "INT64"), + bigquery.SchemaField("pageevent_referrer", "STRING"), + bigquery.SchemaField("pageevent_requeststart", "INT64"), + bigquery.SchemaField("pageevent_responseend", "INT64"), + bigquery.SchemaField("pageevent_responsestart", "INT64"), + bigquery.SchemaField("pageevent_speedindex", "INT64"), + bigquery.SchemaField("pageevent_timestamp", "INT64"), + bigquery.SchemaField("pageevent_url", "STRING"), + bigquery.SchemaField("pageloadtiming_domcontentloadedeventend", "INT64"), + bigquery.SchemaField("pageloadtiming_domcontentloadedeventstart", "INT64"), + bigquery.SchemaField("pageloadtiming_firstcontentfulpaint", "INT64"), + bigquery.SchemaField("pageloadtiming_firstpaint", "INT64"), + bigquery.SchemaField("pageloadtiming_loadeventend", "INT64"), + bigquery.SchemaField("pageloadtiming_loadeventstart", "INT64"), + bigquery.SchemaField("pageloadtiming_requeststart", "INT64"), + bigquery.SchemaField("pageloadtiming_responseend", "INT64"), + bigquery.SchemaField("pageloadtiming_responsestart", "INT64"), + bigquery.SchemaField("pagerendertiming_speedindex", "INT64"), + bigquery.SchemaField("pagerendertiming_timetointeractive", "INT64"), + bigquery.SchemaField("pagerendertiming_visuallycomplete", "INT64"), + bigquery.SchemaField("performancetrack_frames", "INT64"), + bigquery.SchemaField("performancetrack_ticks", "INT64"), + bigquery.SchemaField("performancetrack_totaljsheapsize", "INT64"), + bigquery.SchemaField("performancetrack_usedjsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_avgcpu", "INT64"), + bigquery.SchemaField("performancetrackaggr_avgfps", "INT64"), + bigquery.SchemaField("performancetrackaggr_avgtotaljsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_avgusedjsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_maxcpu", "INT64"), + bigquery.SchemaField("performancetrackaggr_maxfps", "INT64"), + bigquery.SchemaField("performancetrackaggr_maxtotaljsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_maxusedjsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_mincpu", "INT64"), + bigquery.SchemaField("performancetrackaggr_minfps", "INT64"), + bigquery.SchemaField("performancetrackaggr_mintotaljsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_minusedjsheapsize", "INT64"), + bigquery.SchemaField("performancetrackaggr_timestampend", "INT64"), + bigquery.SchemaField("performancetrackaggr_timestampstart", "INT64"), + bigquery.SchemaField("profiler_args", "STRING"), + bigquery.SchemaField("profiler_duration", "INT64"), + bigquery.SchemaField("profiler_name", "STRING"), + bigquery.SchemaField("profiler_result", "STRING"), + bigquery.SchemaField("rawcustomevent_name", "STRING"), + bigquery.SchemaField("rawcustomevent_payload", "STRING"), + bigquery.SchemaField("rawerrorevent_message", "STRING"), + bigquery.SchemaField("rawerrorevent_name", "STRING"), + bigquery.SchemaField("rawerrorevent_payload", "STRING"), + bigquery.SchemaField("rawerrorevent_source", "STRING"), + bigquery.SchemaField("rawerrorevent_timestamp", "INT64"), + bigquery.SchemaField("redux_action", "STRING"), + bigquery.SchemaField("redux_duration", "INT64"), + bigquery.SchemaField("redux_state", "STRING"), + bigquery.SchemaField("removenode_id", "INT64"), + bigquery.SchemaField("removenodeattribute_id", "INT64"), + bigquery.SchemaField("removenodeattribute_name", "STRING"), + bigquery.SchemaField("resourceevent_decodedbodysize", "INT64"), + bigquery.SchemaField("resourceevent_duration", "INT64"), + bigquery.SchemaField("resourceevent_encodedbodysize", "INT64"), + bigquery.SchemaField("resourceevent_headersize", "INT64"), + bigquery.SchemaField("resourceevent_messageid", "INT64"), + bigquery.SchemaField("resourceevent_method", "STRING"), + bigquery.SchemaField("resourceevent_status", "INT64"), + bigquery.SchemaField("resourceevent_success", "BOOL"), + bigquery.SchemaField("resourceevent_timestamp", "INT64"), + bigquery.SchemaField("resourceevent_ttfb", "INT64"), + bigquery.SchemaField("resourceevent_type", "STRING"), + bigquery.SchemaField("resourceevent_url", "STRING"), + bigquery.SchemaField("resourcetiming_decodedbodysize", "INT64"), + bigquery.SchemaField("resourcetiming_duration", "INT64"), + bigquery.SchemaField("resourcetiming_encodedbodysize", "INT64"), + bigquery.SchemaField("resourcetiming_headersize", "INT64"), + bigquery.SchemaField("resourcetiming_initiator", "STRING"), + bigquery.SchemaField("resourcetiming_timestamp", "INT64"), + bigquery.SchemaField("resourcetiming_ttfb", "INT64"), + bigquery.SchemaField("resourcetiming_url", "STRING"), + bigquery.SchemaField("sessiondisconnect", "BOOL"), + bigquery.SchemaField("sessiondisconnect_timestamp", "INT64"), + bigquery.SchemaField("sessionend", "BOOL"), + bigquery.SchemaField("sessionend_timestamp", "INT64"), + bigquery.SchemaField("sessionstart_projectid", "INT64"), + bigquery.SchemaField("sessionstart_revid", "STRING"), + bigquery.SchemaField("sessionstart_timestamp", "INT64"), + bigquery.SchemaField("sessionstart_trackerversion", "STRING"), + bigquery.SchemaField("sessionstart_useragent", "STRING"), + bigquery.SchemaField("sessionstart_userbrowser", "STRING"), + bigquery.SchemaField("sessionstart_userbrowserversion", "STRING"), + bigquery.SchemaField("sessionstart_usercountry", "STRING"), + bigquery.SchemaField("sessionstart_userdevice", "STRING"), + bigquery.SchemaField("sessionstart_userdeviceheapsize", "INT64"), + bigquery.SchemaField("sessionstart_userdevicememorysize", "INT64"), + bigquery.SchemaField("sessionstart_userdevicetype", "STRING"), + bigquery.SchemaField("sessionstart_useros", "STRING"), + bigquery.SchemaField("sessionstart_userosversion", "STRING"), + bigquery.SchemaField("sessionstart_useruuid", "STRING"), + bigquery.SchemaField("setcssdata_data", "INT64"), + bigquery.SchemaField("setcssdata_id", "INT64"), + bigquery.SchemaField("setinputchecked_checked", "INT64"), + bigquery.SchemaField("setinputchecked_id", "INT64"), + bigquery.SchemaField("setinputtarget_id", "INT64"), + bigquery.SchemaField("setinputtarget_label", "INT64"), + bigquery.SchemaField("setinputvalue_id", "INT64"), + bigquery.SchemaField("setinputvalue_mask", "INT64"), + bigquery.SchemaField("setinputvalue_value", "INT64"), + bigquery.SchemaField("setnodeattribute_id", "INT64"), + bigquery.SchemaField("setnodeattribute_name", "INT64"), + bigquery.SchemaField("setnodeattribute_value", "INT64"), + bigquery.SchemaField("setnodedata_data", "INT64"), + bigquery.SchemaField("setnodedata_id", "INT64"), + bigquery.SchemaField("setnodescroll_id", "INT64"), + bigquery.SchemaField("setnodescroll_x", "INT64"), + bigquery.SchemaField("setnodescroll_y", "INT64"), + bigquery.SchemaField("setpagelocation_navigationstart", "INT64"), + bigquery.SchemaField("setpagelocation_referrer", "STRING"), + bigquery.SchemaField("setpagelocation_url", "STRING"), + bigquery.SchemaField("setpagevisibility_hidden", "BOOL"), + bigquery.SchemaField("setviewportscroll_x", "INT64"), + bigquery.SchemaField("setviewportscroll_y", "INT64"), + bigquery.SchemaField("setviewportsize_height", "INT64"), + bigquery.SchemaField("setviewportsize_width", "INT64"), + bigquery.SchemaField("stateaction_type", "STRING"), + bigquery.SchemaField("stateactionevent_messageid", "INT64"), + bigquery.SchemaField("stateactionevent_timestamp", "INT64"), + bigquery.SchemaField("stateactionevent_type", "STRING"), + bigquery.SchemaField("timestamp_timestamp", "INT64"), + bigquery.SchemaField("useranonymousid_id", "STRING"), + bigquery.SchemaField("userid_id", "STRING"), + bigquery.SchemaField("vuex_mutation", "STRING"), + bigquery.SchemaField("vuex_state", "STRING"), + bigquery.SchemaField("longtasks_timestamp", "INT64"), + bigquery.SchemaField("longtasks_duration", "INT64"), + bigquery.SchemaField("longtasks_context", "FLOAT64"), + bigquery.SchemaField("longtasks_containertype", "FLOAT64"), + bigquery.SchemaField("longtasks_containersrc", "STRING"), + bigquery.SchemaField("longtasks_containerid", "STRING"), + bigquery.SchemaField("longtasks_containername", "FLOAT64"), + bigquery.SchemaField("setnodeurlbasedattribute_id", "FLOAT64"), + bigquery.SchemaField("setnodeurlbasedattribute_name", "STRING"), + bigquery.SchemaField("setnodeurlbasedattribute_value", "STRING"), + bigquery.SchemaField("setnodeurlbasedattribute_baseurl", "STRING"), + bigquery.SchemaField("setstyledata_id", "FLOAT64"), + bigquery.SchemaField("setstyledata_data", "STRING"), #CAUSING ISSUES + bigquery.SchemaField("setstyledata_baseurl", "STRING"), #CAUSING ISSUES + bigquery.SchemaField("issueevent_messageid", "FLOAT64"), + bigquery.SchemaField("issueevent_timestamp", "FLOAT64"), + bigquery.SchemaField("issueevent_type", "FLOAT64"), + bigquery.SchemaField("issueevent_contextstring", "FLOAT64"), + bigquery.SchemaField("issueevent_context", "FLOAT64"), + bigquery.SchemaField("issueevent_payload", "FLOAT64"), + bigquery.SchemaField("technicalinfo_type", "FLOAT64"), + bigquery.SchemaField("technicalinfo_value", "FLOAT64"), + bigquery.SchemaField("customissue_name", "FLOAT64"), + bigquery.SchemaField("customissue_payload", "FLOAT64"), + bigquery.SchemaField("pageclose", "FLOAT64"), + bigquery.SchemaField("received_at", "INT64", mode="REQUIRED"), + bigquery.SchemaField("batch_order_number", "INT64", mode="REQUIRED") + ] + create_table(creds_file, table_id, schema) def create_table_negatives(creds_file, table_id): client = bigquery.Client.from_service_account_json(creds_file) From b446147cb8d0d8c883cf55e0c14692fe288621cf Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 15 Jul 2022 17:12:22 +0200 Subject: [PATCH 06/33] solved some minor issues --- ee/connectors/db/models.py | 14 ++-- ee/connectors/db/utils.py | 1 + ee/connectors/handler.py | 141 +++++++++++++++++++++++++++++++- ee/connectors/msgcodec/codec.py | 2 +- 4 files changed, 149 insertions(+), 9 deletions(-) diff --git a/ee/connectors/db/models.py b/ee/connectors/db/models.py index 82af90ad8..74cd23230 100644 --- a/ee/connectors/db/models.py +++ b/ee/connectors/db/models.py @@ -346,13 +346,13 @@ class DetailedEvent(Base): userid_id = Column(VARCHAR(5000)) vuex_mutation = Column(VARCHAR(5000)) vuex_state = Column(VARCHAR(5000)) - longtask_timestamp = Column(BigInteger) - longtask_duration = Column(BigInteger) - longtask_context = Column(BigInteger) - longtask_containertype = Column(BigInteger) - longtask_containersrc = Column(VARCHAR(5000)) - longtask_containerid = Column(VARCHAR(5000)) - longtask_containername = Column(VARCHAR(5000)) + longtasks_timestamp = Column(BigInteger) + longtasks_duration = Column(BigInteger) + longtasks_context = Column(BigInteger) + longtasks_containertype = Column(BigInteger) + longtasks_containersrc = Column(VARCHAR(5000)) + longtasks_containerid = Column(VARCHAR(5000)) + longtasks_containername = Column(VARCHAR(5000)) setnodeurlbasedattribute_id = Column(BigInteger) setnodeurlbasedattribute_name = Column(VARCHAR(5000)) setnodeurlbasedattribute_value = Column(VARCHAR(5000)) diff --git a/ee/connectors/db/utils.py b/ee/connectors/db/utils.py index 7c268c6b3..a51806289 100644 --- a/ee/connectors/db/utils.py +++ b/ee/connectors/db/utils.py @@ -336,6 +336,7 @@ def get_df_from_batch(batch, level): df = pd.DataFrame([b.__dict__ for b in batch], columns=detailed_events_col) if level == 'sessions': df = pd.DataFrame([b.__dict__ for b in batch], columns=sessions_col) + print('if error is after me, then problem is later') try: df = df.drop('_sa_instance_state', axis=1) diff --git a/ee/connectors/handler.py b/ee/connectors/handler.py index 5167c7800..6f6316545 100644 --- a/ee/connectors/handler.py +++ b/ee/connectors/handler.py @@ -49,6 +49,14 @@ def handle_normal_message(message: Message) -> Optional[Event]: n.mouseclick_hesitationtime = message.hesitation_time n.mouseclick_id = message.id n.mouseclick_label = message.label + n.mouseclick_selector = message.selector + return n + + if isinstance(message, MouseClickDepricated): + n.mouseclick_hesitationtime = message.hesitation_time + n.mouseclick_id = message.id + n.mouseclick_label = message.label + n.mouseclick_selector = '' return n if isinstance(message, PageEvent): @@ -132,6 +140,18 @@ def handle_session(n: Session, message: Message) -> Optional[Session]: pass return n + if isinstance(message, BatchMeta): + n.batchmeta_page_no = message.page_no + n.batchmeta_first_index = message.first_index + n.batchmeta_timestamp = message.timestamp + return n + + # if isinstance(message, IOSBatchMeta): + # n.iosbatchmeta_page_no = message.page_no + # n.iosbatchmeta_first_index = message.first_index + # n.iosbatchmeta_timestamp = message.timestamp + # return n + if isinstance(message, ConnectionInformation): n.connection_effective_bandwidth = message.downlink n.connection_type = message.type @@ -224,7 +244,7 @@ def handle_session(n: Session, message: Message) -> Optional[Session]: n.inputs_count = 1 return n - if isinstance(message, MouseClick): + if isinstance(message, MouseClickDepricated): try: n.inputs_count += 1 except TypeError: @@ -278,6 +298,11 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.sessionstart_usercountry = message.user_country return n + if isinstance(message, CreateIFrameDocument): + n.create_iframedocument_frame_id = message.frame_id + n.create_iframedocument_id = message.id + return n + if isinstance(message, SetViewportSize): n.setviewportsize_width = message.width n.setviewportsize_height = message.height @@ -360,6 +385,12 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.metadata_value = message.value return n + if isinstance(message, BatchMeta): + n.batchmeta_page_no = message.page_no + n.batchmeta_first_index = message.first_index + n.batchmeta_timestamp = message.timestamp + return n + if isinstance(message, PerformanceTrack): n.performancetrack_frames = message.frames n.performancetrack_ticks = message.ticks @@ -470,6 +501,12 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: # n.cssinsertrule_rule = message.rule # n.cssinsertrule_index = message.index # return n + + # if isinstance(message, CSSInsertRuleURLBased): + # n.cssinsertrule_urlbased_id = message.id + # n.cssinsertrule_urlbased_rule = message.rule + # n.cssinsertrule_urlbased_index = message.index + # n.cssinsertrule_urlbased_base_url = message.base_url # # if isinstance(message, CSSDeleteRule): # n.cssdeleterule_stylesheetid = message.id @@ -485,6 +522,17 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.fetch_duration = message.duration return n + if isinstance(message, FetchEvent): + n.fetch_event_message_id = message.message_id + n.fetch_event_timestamp = message.timestamp + n.fetch_event_method = message.method + n.fetch_event_url = message.url + n.fetch_event_request = message.request + n.fetch_event_response = message.response + n.fetch_event_status = message.status + n.fetch_event_duration = message.duration + return n + if isinstance(message, Profiler): n.profiler_name = message.name n.profiler_duration = message.duration @@ -513,6 +561,14 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.mouseclick_id = message.id n.mouseclick_hesitationtime = message.hesitation_time n.mouseclick_label = message.label + n.mouseclick_selector = message.selector + return n + + if isinstance(message, MouseClickDepricated): + n.mouseclick_id = message.id + n.mouseclick_hesitationtime = message.hesitation_time + n.mouseclick_label = message.label + n.mouseclick_selector = '' return n if isinstance(message, SetPageLocation): @@ -572,6 +628,10 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.pageclose = True return n + if isinstance(message, AssetCache): + n.asset_cache_url = message.url + return n + if isinstance(message, IOSSessionStart): n.iossessionstart_timestamp = message.timestamp n.iossessionstart_projectid = message.project_id @@ -596,6 +656,12 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.iosmetadata_value = message.value return n + if isinstance(message, IOSBatchMeta): + n.iosbatchmeta_page_no = message.page_no + n.iosbatchmeta_first_index = message.first_index + n.iosbatchmeta_timestamp = message.timestamp + return n + if isinstance(message, IOSUserID): n.iosuserid_timestamp = message.timestamp n.iosuserid_length = message.length @@ -608,6 +674,13 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.iosuseranonymousid_value = message.value return n + if isinstance(message, IOSScreenEnter): + n.iosscreenenter_timestamp = message.timestamp + n.iosscreenenter_length = message.length + n.iosscreenenter_title = message.title + n.iosscreenenter_view_name = message.view_name + return n + if isinstance(message, IOSScreenLeave): n.iosscreenleave_timestamp = message.timestamp n.iosscreenleave_length = message.length @@ -615,6 +688,30 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.iosscreenleave_viewname = message.view_name return n + if isinstance(message, IOSScreenChanges): + n.iosscreenchanges_timestamp = message.timestamp + n.iosscreenchanges_length = message.length + n.iosscreenchanges_x = message.x + n.iosscreenchanges_y = message.y + n.iosscreenchanges_width = message.width + n.iosscreenchanges_height = message.height + return n + + if isinstance(message, IOSClickEvent): + n.iosclickevent_timestamp = message.timestamp + n.iosclickevent_length = message.length + n.iosclickevent_label = message.label + n.iosclickevent_x = message.x + n.iosclickevent_y = message.y + return n + + if isinstance(message, IOSInputEvent): + n.iosinputevent_timestamp = message.timestamp + n.iosinputevent_length = message.length + n.iosinputevent_value_masked = message.value_masked + n.iosinputevent_label = message.label + return n + if isinstance(message, IOSLog): n.ioslog_timestamp = message.timestamp n.ioslog_length = message.length @@ -622,12 +719,54 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.ioslog_content = message.content return n + if isinstance(message, IOSNetworkCall): + n.iosnetworkcall_timestamp = message.timestamp + n.iosnetworkcall_length = message.length + n.iosnetworkcall_duration = message.duration + n.iosnetworkcall_headers = message.headers + n.iosnetworkcall_body = message.body + n.iosnetworkcall_url = message.url + n.iosnetworkcall_success = message.success + n.iosnetworkcall_method = message.method + n.iosnetworkcall_status = message.status + return n + + if isinstance(message, IOSIssueEvent): + n.iosissueevent_timestamp = message.timestamp + n.iosissueevent_type = message.type + n.iosissueevent_context_string = message.context_string + n.iosissueevent_context = message.context + n.iosissueevent_payload = message.payload + return n + + if isinstance(message, IOSCustomEvent): + n.ioscustomevent_timestamp = message.timestamp + n.ioscustomevent_length = message.length + n.ioscustomevent_name = message.name + n.ioscustomevent_payload = message.payload + return n + if isinstance(message, IOSInternalError): n.iosinternalerror_timestamp = message.timestamp n.iosinternalerror_length = message.length n.iosinternalerror_content = message.content return n + if isinstance(message, IOSCrash): + n.ioscrash_timestamp = message.timestamp + n.ioscrash_length = message.length + n.ioscrash_name = message.name + n.ioscrash_reason = message.reason + n.ioscrash_stacktrace = message.stacktrace + return n + + if isinstance(message, IOSPerformanceEvent): + n.iosperformanceevent_timestamp = message.timestamp + n.iosperformanceevent_length = message.length + n.iosperformanceevent_name = message.name + n.iosperformanceevent_value = message.value + return n + if isinstance(message, IOSPerformanceAggregated): n.iosperformanceaggregated_timestampstart = message.timestamp_start n.iosperformanceaggregated_timestampend = message.timestamp_end diff --git a/ee/connectors/msgcodec/codec.py b/ee/connectors/msgcodec/codec.py index e26653478..7e1aabbf9 100644 --- a/ee/connectors/msgcodec/codec.py +++ b/ee/connectors/msgcodec/codec.py @@ -687,7 +687,7 @@ class MessageCodec(Codec): ) if message_id == 98: - return IOSSCreenEnter( + return IOSScreenEnter( timestamp=self.read_uint(reader), length=self.read_uint(reader), title=self.read_string(reader), From 02cbeca7712701ee816afdca0c936db52d35e1a9 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 15 Jul 2022 17:15:56 +0200 Subject: [PATCH 07/33] deleted some prints --- ee/connectors/db/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ee/connectors/db/utils.py b/ee/connectors/db/utils.py index a51806289..7c268c6b3 100644 --- a/ee/connectors/db/utils.py +++ b/ee/connectors/db/utils.py @@ -336,7 +336,6 @@ def get_df_from_batch(batch, level): df = pd.DataFrame([b.__dict__ for b in batch], columns=detailed_events_col) if level == 'sessions': df = pd.DataFrame([b.__dict__ for b in batch], columns=sessions_col) - print('if error is after me, then problem is later') try: df = df.drop('_sa_instance_state', axis=1) From de1977b0de40548b4c8d2ff329dec36d4fe0915d Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Mon, 18 Jul 2022 14:38:14 +0200 Subject: [PATCH 08/33] Changed types of some messages and force type when pandas fails to recognize column type --- ee/connectors/bigquery_utils/create_table.py | 20 ++++++++++---------- ee/connectors/db/utils.py | 9 +++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/ee/connectors/bigquery_utils/create_table.py b/ee/connectors/bigquery_utils/create_table.py index 9fd7ef1cc..5dc4e9b9a 100644 --- a/ee/connectors/bigquery_utils/create_table.py +++ b/ee/connectors/bigquery_utils/create_table.py @@ -351,18 +351,18 @@ def create_events_detailed_table(creds_file, table_id): bigquery.SchemaField("setnodeurlbasedattribute_value", "STRING"), bigquery.SchemaField("setnodeurlbasedattribute_baseurl", "STRING"), bigquery.SchemaField("setstyledata_id", "FLOAT64"), - bigquery.SchemaField("setstyledata_data", "STRING"), #CAUSING ISSUES - bigquery.SchemaField("setstyledata_baseurl", "STRING"), #CAUSING ISSUES + bigquery.SchemaField("setstyledata_data", "STRING"), + bigquery.SchemaField("setstyledata_baseurl", "STRING"), bigquery.SchemaField("issueevent_messageid", "FLOAT64"), bigquery.SchemaField("issueevent_timestamp", "FLOAT64"), - bigquery.SchemaField("issueevent_type", "FLOAT64"), - bigquery.SchemaField("issueevent_contextstring", "FLOAT64"), - bigquery.SchemaField("issueevent_context", "FLOAT64"), - bigquery.SchemaField("issueevent_payload", "FLOAT64"), - bigquery.SchemaField("technicalinfo_type", "FLOAT64"), - bigquery.SchemaField("technicalinfo_value", "FLOAT64"), - bigquery.SchemaField("customissue_name", "FLOAT64"), - bigquery.SchemaField("customissue_payload", "FLOAT64"), + bigquery.SchemaField("issueevent_type", "STRING"), + bigquery.SchemaField("issueevent_contextstring", "STRING"), + bigquery.SchemaField("issueevent_context", "STRING"), + bigquery.SchemaField("issueevent_payload", "STRING"), + bigquery.SchemaField("technicalinfo_type", "STRING"), + bigquery.SchemaField("technicalinfo_value", "STRING"), + bigquery.SchemaField("customissue_name", "STRING"), + bigquery.SchemaField("customissue_payload", "STRING"), bigquery.SchemaField("pageclose", "FLOAT64"), bigquery.SchemaField("received_at", "INT64", mode="REQUIRED"), bigquery.SchemaField("batch_order_number", "INT64", mode="REQUIRED") diff --git a/ee/connectors/db/utils.py b/ee/connectors/db/utils.py index 7c268c6b3..cdc650884 100644 --- a/ee/connectors/db/utils.py +++ b/ee/connectors/db/utils.py @@ -355,8 +355,17 @@ def get_df_from_batch(batch, level): df['issues'] = df['issues'].fillna('') df['urls'] = df['urls'].fillna('') + forced_type_cols_str = ['setnodeurlbasedattribute_name', 'setnodeurlbasedattribute_value', + 'setnodeurlbasedattribute_baseurl', 'setstyledata_data', 'setstyledata_baseurl', + 'customissue_payload', 'customissue_name', 'technicalinfo_value', 'technicalinfo_type', + 'issueevent_payload', 'issueevent_context', 'issueevent_contextstring', 'issueevent_type'] + forced_type_cols_float = ['setnodeurlbasedattribute_id'] for x in df.columns: try: + if x in forced_type_cols_str: + df[x] = df[x].apply(str) + if x in forced_type_cols_float: + df[x] = df[x].apply(float) if df[x].dtype == 'string': df[x] = df[x].str.slice(0, 255) df[x] = df[x].str.replace("|", "") From f51b067ed8db3dd62a764ec053b6aa445810e3c6 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Tue, 19 Jul 2022 11:19:17 +0200 Subject: [PATCH 09/33] Updated PG connector events --- ee/connectors/db/tables.py | 5 + .../sql/postgres_events_detailed.sql | 238 ++++++++++++++++++ 2 files changed, 243 insertions(+) create mode 100644 ee/connectors/sql/postgres_events_detailed.sql diff --git a/ee/connectors/db/tables.py b/ee/connectors/db/tables.py index 0127cbbd1..e3298825f 100644 --- a/ee/connectors/db/tables.py +++ b/ee/connectors/db/tables.py @@ -36,6 +36,11 @@ def create_tables_postgres(db): db.engine.execute(q) print(f"`connector_sessions` table created succesfully.") + #with open(base_path / 'sql' / 'postgres_events_detailed.sql') as f: + # q = f.read() + #db.engine.execute(q) + #print(f"`connector_user_events_detailed` table created succesfully.") + def create_tables_snowflake(db): with open(base_path / 'sql' / 'snowflake_events.sql') as f: diff --git a/ee/connectors/sql/postgres_events_detailed.sql b/ee/connectors/sql/postgres_events_detailed.sql new file mode 100644 index 000000000..092a50ea3 --- /dev/null +++ b/ee/connectors/sql/postgres_events_detailed.sql @@ -0,0 +1,238 @@ +CREATE TABLE IF NOT EXISTS connector_events_detailed +( + sessionid bigint, + clickevent_hesitationtime bigint, + clickevent_label text, + clickevent_messageid bigint, + clickevent_timestamp bigint, + connectioninformation_downlink bigint, + connectioninformation_type text, + consolelog_level text, + consolelog_value text, + cpuissue_duration bigint, + cpuissue_rate bigint, + cpuissue_timestamp bigint, + createdocument boolean, + createelementnode_id bigint, + createelementnode_parentid bigint, + cssdeleterule_index bigint, + cssdeleterule_stylesheetid bigint, + cssinsertrule_index bigint, + cssinsertrule_rule text, + cssinsertrule_stylesheetid bigint, + customevent_messageid bigint, + customevent_name text, + customevent_payload text, + customevent_timestamp bigint, + domdrop_timestamp bigint, + errorevent_message text, + errorevent_messageid bigint, + errorevent_name text, + errorevent_payload text, + errorevent_source text, + errorevent_timestamp bigint, + fetch_duration bigint, + fetch_method text, + fetch_request text, + fetch_response text, + fetch_status bigint, + fetch_timestamp bigint, + fetch_url text, + graphql_operationkind text, + graphql_operationname text, + graphql_response text, + graphql_variables text, + graphqlevent_messageid bigint, + graphqlevent_name text, + graphqlevent_timestamp bigint, + inputevent_label text, + inputevent_messageid bigint, + inputevent_timestamp bigint, + inputevent_value text, + inputevent_valuemasked boolean, + jsexception_message text, + jsexception_name text, + jsexception_payload text, + memoryissue_duration bigint, + memoryissue_rate bigint, + memoryissue_timestamp bigint, + metadata_key text, + metadata_value text, + mobx_payload text, + mobx_type text, + mouseclick_id bigint, + mouseclick_hesitationtime bigint, + mouseclick_label text, + mousemove_x bigint, + mousemove_y bigint, + movenode_id bigint, + movenode_index bigint, + movenode_parentid bigint, + ngrx_action text, + ngrx_duration bigint, + ngrx_state text, + otable_key text, + otable_value text, + pageevent_domcontentloadedeventend bigint, + pageevent_domcontentloadedeventstart bigint, + pageevent_firstcontentfulpaint bigint, + pageevent_firstpaint bigint, + pageevent_loaded boolean, + pageevent_loadeventend bigint, + pageevent_loadeventstart bigint, + pageevent_messageid bigint, + pageevent_referrer text, + pageevent_requeststart bigint, + pageevent_responseend bigint, + pageevent_responsestart bigint, + pageevent_speedindex bigint, + pageevent_timestamp bigint, + pageevent_url text, + pageloadtiming_domcontentloadedeventend bigint, + pageloadtiming_domcontentloadedeventstart bigint, + pageloadtiming_firstcontentfulpaint bigint, + pageloadtiming_firstpaint bigint, + pageloadtiming_loadeventend bigint, + pageloadtiming_loadeventstart bigint, + pageloadtiming_requeststart bigint, + pageloadtiming_responseend bigint, + pageloadtiming_responsestart bigint, + pagerendertiming_speedindex bigint, + pagerendertiming_timetointeractive bigint, + pagerendertiming_visuallycomplete bigint, + performancetrack_frames bigint, + performancetrack_ticks bigint, + performancetrack_totaljsheapsize bigint, + performancetrack_usedjsheapsize bigint, + performancetrackaggr_avgcpu bigint, + performancetrackaggr_avgfps bigint, + performancetrackaggr_avgtotaljsheapsize bigint, + performancetrackaggr_avgusedjsheapsize bigint, + performancetrackaggr_maxcpu bigint, + performancetrackaggr_maxfps bigint, + performancetrackaggr_maxtotaljsheapsize bigint, + performancetrackaggr_maxusedjsheapsize bigint, + performancetrackaggr_mincpu bigint, + performancetrackaggr_minfps bigint, + performancetrackaggr_mintotaljsheapsize bigint, + performancetrackaggr_minusedjsheapsize bigint, + performancetrackaggr_timestampend bigint, + performancetrackaggr_timestampstart bigint, + profiler_args text, + profiler_duration bigint, + profiler_name text, + profiler_result text, + rawcustomevent_name text, + rawcustomevent_payload text, + rawerrorevent_message text, + rawerrorevent_name text, + rawerrorevent_payload text, + rawerrorevent_source text, + rawerrorevent_timestamp bigint, + redux_action text, + redux_duration bigint, + redux_state text, + removenode_id bigint, + removenodeattribute_id bigint, + removenodeattribute_name text, + resourceevent_decodedbodysize bigint, + resourceevent_duration bigint, + resourceevent_encodedbodysize bigint, + resourceevent_headersize bigint, + resourceevent_messageid bigint, + resourceevent_method text, + resourceevent_status bigint, + resourceevent_success boolean, + resourceevent_timestamp bigint, + resourceevent_ttfb bigint, + resourceevent_type text, + resourceevent_url text, + resourcetiming_decodedbodysize bigint, + resourcetiming_duration bigint, + resourcetiming_encodedbodysize bigint, + resourcetiming_headersize bigint, + resourcetiming_initiator text, + resourcetiming_timestamp bigint, + resourcetiming_ttfb bigint, + resourcetiming_url text, + sessiondisconnect boolean, + sessiondisconnect_timestamp bigint, + sessionend boolean, + sessionend_timestamp bigint, + sessionstart_projectid bigint, + sessionstart_revid text, + sessionstart_timestamp bigint, + sessionstart_trackerversion text, + sessionstart_useragent text, + sessionstart_userbrowser text, + sessionstart_userbrowserversion text, + sessionstart_usercountry text, + sessionstart_userdevice text, + sessionstart_userdeviceheapsize bigint, + sessionstart_userdevicememorysize bigint, + sessionstart_userdevicetype text, + sessionstart_useros text, + sessionstart_userosversion text, + sessionstart_useruuid text, + setcssdata_data bigint, + setcssdata_id bigint, + setinputchecked_checked bigint, + setinputchecked_id bigint, + setinputtarget_id bigint, + setinputtarget_label bigint, + setinputvalue_id bigint, + setinputvalue_mask bigint, + setinputvalue_value bigint, + setnodeattribute_id bigint, + setnodeattribute_name bigint, + setnodeattribute_value bigint, + setnodedata_data bigint, + setnodedata_id bigint, + setnodescroll_id bigint, + setnodescroll_x bigint, + setnodescroll_y bigint, + setpagelocation_navigationstart bigint, + setpagelocation_referrer text, + setpagelocation_url text, + setpagevisibility_hidden boolean, + setviewportscroll_x bigint, + setviewportscroll_y bigint, + setviewportsize_height bigint, + setviewportsize_width bigint, + stateaction_type text, + stateactionevent_messageid bigint, + stateactionevent_timestamp bigint, + stateactionevent_type text, + timestamp_timestamp bigint, + useranonymousid_id text, + userid_id text, + vuex_mutation text, + vuex_state text, + longtasks_timestamp bigint, + longtasks_duration bigint, + longtasks_context bigint, + longtasks_containertype bigint, + longtasks_containersrc text, + longtasks_containerid text, + longtasks_containername bigint, + setnodeurlbasedattribute_id bigint, + setnodeurlbasedattribute_name text, + setnodeurlbasedattribute_value text, + setnodeurlbasedattribute_baseurl text, + setstyledata_id bigint, + setstyledata_data text, + setstyledata_baseurl text, + issueevent_messageid bigint, + issueevent_timestamp bigint, + issueevent_type text, + issueevent_contextstring text, + issueevent_context text, + issueevent_payload text, + technicalinfo_type text, + technicalinfo_value text, + customissue_name text, + customissue_payload text, + pageclose bigint, + received_at bigint, + batch_order_number bigint +); From 7abe9f96808484dd3a885a8bb57928830823ad4b Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 20 Jul 2022 10:54:53 +0200 Subject: [PATCH 10/33] Updated CH connector and event types --- ee/connectors/db/tables.py | 10 + ee/connectors/db/utils.py | 28 +- .../sql/clickhouse_events_detailed.sql | 242 ++++++++++++++++++ .../sql/clickhouse_events_detailed_buffer.sql | 238 +++++++++++++++++ 4 files changed, 508 insertions(+), 10 deletions(-) create mode 100644 ee/connectors/sql/clickhouse_events_detailed.sql create mode 100644 ee/connectors/sql/clickhouse_events_detailed_buffer.sql diff --git a/ee/connectors/db/tables.py b/ee/connectors/db/tables.py index e3298825f..db1c2ca42 100644 --- a/ee/connectors/db/tables.py +++ b/ee/connectors/db/tables.py @@ -24,6 +24,16 @@ def create_tables_clickhouse(db): db.engine.execute(q) print(f"`connector_sessions_buffer` table created succesfully.") + #with open(base_path / 'sql' / 'clickhouse_events_detailed.sql') as f: + # q = f.read() + #db.engine.execute(q) + #print(f"`connector_user_events_detailed` table created succesfully.") + + #with open(base_path / 'sql' / 'clickhouse_events_detailed_buffer.sql') as f: + # q = f.read() + #db.engine.execute(q) + #print(f"`connector_user_events_detailed_buffer` table created succesfully.") + def create_tables_postgres(db): with open(base_path / 'sql' / 'postgres_events.sql') as f: diff --git a/ee/connectors/db/utils.py b/ee/connectors/db/utils.py index cdc650884..698a6043a 100644 --- a/ee/connectors/db/utils.py +++ b/ee/connectors/db/utils.py @@ -264,7 +264,24 @@ dtypes_detailed_events = { "vuex_mutation": "object", "vuex_state": "string", "received_at": "Int64", - "batch_order_number": "Int64" + "batch_order_number": "Int64", + + #NEW + 'setnodeurlbasedattribute_id': 'Int64', + 'setnodeurlbasedattribute_name': 'string', + 'setnodeurlbasedattribute_value': 'string', + 'setnodeurlbasedattribute_baseurl': 'string', + 'setstyledata_id': 'Int64', + 'setstyledata_data': 'string', + 'setstyledata_baseurl': 'string', + 'customissue_payload': 'string', + 'customissue_name': 'string', + 'technicalinfo_value': 'string', + 'technicalinfo_type': 'string', + 'issueevent_payload': 'string', + 'issueevent_context': 'string', + 'issueevent_contextstring': 'string', + 'issueevent_type': 'string' } dtypes_sessions = {'sessionid': 'Int64', 'user_agent': 'string', @@ -355,17 +372,8 @@ def get_df_from_batch(batch, level): df['issues'] = df['issues'].fillna('') df['urls'] = df['urls'].fillna('') - forced_type_cols_str = ['setnodeurlbasedattribute_name', 'setnodeurlbasedattribute_value', - 'setnodeurlbasedattribute_baseurl', 'setstyledata_data', 'setstyledata_baseurl', - 'customissue_payload', 'customissue_name', 'technicalinfo_value', 'technicalinfo_type', - 'issueevent_payload', 'issueevent_context', 'issueevent_contextstring', 'issueevent_type'] - forced_type_cols_float = ['setnodeurlbasedattribute_id'] for x in df.columns: try: - if x in forced_type_cols_str: - df[x] = df[x].apply(str) - if x in forced_type_cols_float: - df[x] = df[x].apply(float) if df[x].dtype == 'string': df[x] = df[x].str.slice(0, 255) df[x] = df[x].str.replace("|", "") diff --git a/ee/connectors/sql/clickhouse_events_detailed.sql b/ee/connectors/sql/clickhouse_events_detailed.sql new file mode 100644 index 000000000..f029184e3 --- /dev/null +++ b/ee/connectors/sql/clickhouse_events_detailed.sql @@ -0,0 +1,242 @@ +CREATE TABLE IF NOT EXISTS connector_events_detailed +( + sessionid UInt64, + clickevent_hesitationtime Nullable(UInt64), + clickevent_label Nullable(String), + clickevent_messageid Nullable(UInt64), + clickevent_timestamp Nullable(UInt64), + connectioninformation_downlink Nullable(UInt64), + connectioninformation_type Nullable(String), + consolelog_level Nullable(String), + consolelog_value Nullable(String), + cpuissue_duration Nullable(UInt64), + cpuissue_rate Nullable(UInt64), + cpuissue_timestamp Nullable(UInt64), + createdocument Nullable(Bool), + createelementnode_id Nullable(UInt64), + createelementnode_parentid Nullable(UInt64), + cssdeleterule_index Nullable(UInt64), + cssdeleterule_stylesheetid Nullable(UInt64), + cssinsertrule_index Nullable(UInt64), + cssinsertrule_rule Nullable(String), + cssinsertrule_stylesheetid Nullable(UInt64), + customevent_messageid Nullable(UInt64), + customevent_name Nullable(String), + customevent_payload Nullable(String), + customevent_timestamp Nullable(UInt64), + domdrop_timestamp Nullable(UInt64), + errorevent_message Nullable(String), + errorevent_messageid Nullable(UInt64), + errorevent_name Nullable(String), + errorevent_payload Nullable(String), + errorevent_source Nullable(String), + errorevent_timestamp Nullable(UInt64), + fetch_duration Nullable(UInt64), + fetch_method Nullable(String), + fetch_request Nullable(String), + fetch_response Nullable(String), + fetch_status Nullable(UInt64), + fetch_timestamp Nullable(UInt64), + fetch_url Nullable(String), + graphql_operationkind Nullable(String), + graphql_operationname Nullable(String), + graphql_response Nullable(String), + graphql_variables Nullable(String), + graphqlevent_messageid Nullable(UInt64), + graphqlevent_name Nullable(String), + graphqlevent_timestamp Nullable(UInt64), + inputevent_label Nullable(String), + inputevent_messageid Nullable(UInt64), + inputevent_timestamp Nullable(UInt64), + inputevent_value Nullable(String), + inputevent_valuemasked Nullable(Bool), + jsexception_message Nullable(String), + jsexception_name Nullable(String), + jsexception_payload Nullable(String), + memoryissue_duration Nullable(UInt64), + memoryissue_rate Nullable(UInt64), + memoryissue_timestamp Nullable(UInt64), + metadata_key Nullable(String), + metadata_value Nullable(String), + mobx_payload Nullable(String), + mobx_type Nullable(String), + mouseclick_id Nullable(UInt64), + mouseclick_hesitationtime Nullable(UInt64), + mouseclick_label Nullable(String), + mousemove_x Nullable(UInt64), + mousemove_y Nullable(UInt64), + movenode_id Nullable(UInt64), + movenode_index Nullable(UInt64), + movenode_parentid Nullable(UInt64), + ngrx_action Nullable(String), + ngrx_duration Nullable(UInt64), + ngrx_state Nullable(String), + otable_key Nullable(String), + otable_value Nullable(String), + pageevent_domcontentloadedeventend Nullable(UInt64), + pageevent_domcontentloadedeventstart Nullable(UInt64), + pageevent_firstcontentfulpaint Nullable(UInt64), + pageevent_firstpaint Nullable(UInt64), + pageevent_loaded Nullable(Bool), + pageevent_loadeventend Nullable(UInt64), + pageevent_loadeventstart Nullable(UInt64), + pageevent_messageid Nullable(UInt64), + pageevent_referrer Nullable(String), + pageevent_requeststart Nullable(UInt64), + pageevent_responseend Nullable(UInt64), + pageevent_responsestart Nullable(UInt64), + pageevent_speedindex Nullable(UInt64), + pageevent_timestamp Nullable(UInt64), + pageevent_url Nullable(String), + pageloadtiming_domcontentloadedeventend Nullable(UInt64), + pageloadtiming_domcontentloadedeventstart Nullable(UInt64), + pageloadtiming_firstcontentfulpaint Nullable(UInt64), + pageloadtiming_firstpaint Nullable(UInt64), + pageloadtiming_loadeventend Nullable(UInt64), + pageloadtiming_loadeventstart Nullable(UInt64), + pageloadtiming_requeststart Nullable(UInt64), + pageloadtiming_responseend Nullable(UInt64), + pageloadtiming_responsestart Nullable(UInt64), + pagerendertiming_speedindex Nullable(UInt64), + pagerendertiming_timetointeractive Nullable(UInt64), + pagerendertiming_visuallycomplete Nullable(UInt64), + performancetrack_frames Nullable(Int64), + performancetrack_ticks Nullable(Int64), + performancetrack_totaljsheapsize Nullable(UInt64), + performancetrack_usedjsheapsize Nullable(UInt64), + performancetrackaggr_avgcpu Nullable(UInt64), + performancetrackaggr_avgfps Nullable(UInt64), + performancetrackaggr_avgtotaljsheapsize Nullable(UInt64), + performancetrackaggr_avgusedjsheapsize Nullable(UInt64), + performancetrackaggr_maxcpu Nullable(UInt64), + performancetrackaggr_maxfps Nullable(UInt64), + performancetrackaggr_maxtotaljsheapsize Nullable(UInt64), + performancetrackaggr_maxusedjsheapsize Nullable(UInt64), + performancetrackaggr_mincpu Nullable(UInt64), + performancetrackaggr_minfps Nullable(UInt64), + performancetrackaggr_mintotaljsheapsize Nullable(UInt64), + performancetrackaggr_minusedjsheapsize Nullable(UInt64), + performancetrackaggr_timestampend Nullable(UInt64), + performancetrackaggr_timestampstart Nullable(UInt64), + profiler_args Nullable(String), + profiler_duration Nullable(UInt64), + profiler_name Nullable(String), + profiler_result Nullable(String), + rawcustomevent_name Nullable(String), + rawcustomevent_payload Nullable(String), + rawerrorevent_message Nullable(String), + rawerrorevent_name Nullable(String), + rawerrorevent_payload Nullable(String), + rawerrorevent_source Nullable(String), + rawerrorevent_timestamp Nullable(UInt64), + redux_action Nullable(String), + redux_duration Nullable(UInt64), + redux_state Nullable(String), + removenode_id Nullable(UInt64), + removenodeattribute_id Nullable(UInt64), + removenodeattribute_name Nullable(String), + resourceevent_decodedbodysize Nullable(UInt64), + resourceevent_duration Nullable(UInt64), + resourceevent_encodedbodysize Nullable(UInt64), + resourceevent_headersize Nullable(UInt64), + resourceevent_messageid Nullable(UInt64), + resourceevent_method Nullable(String), + resourceevent_status Nullable(UInt64), + resourceevent_success Nullable(Bool), + resourceevent_timestamp Nullable(UInt64), + resourceevent_ttfb Nullable(UInt64), + resourceevent_type Nullable(String), + resourceevent_url Nullable(String), + resourcetiming_decodedbodysize Nullable(UInt64), + resourcetiming_duration Nullable(UInt64), + resourcetiming_encodedbodysize Nullable(UInt64), + resourcetiming_headersize Nullable(UInt64), + resourcetiming_initiator Nullable(String), + resourcetiming_timestamp Nullable(UInt64), + resourcetiming_ttfb Nullable(UInt64), + resourcetiming_url Nullable(String), + sessiondisconnect Nullable(Bool), + sessiondisconnect_timestamp Nullable(UInt64), + sessionend Nullable(Bool), + sessionend_timestamp Nullable(UInt64), + sessionstart_projectid Nullable(UInt64), + sessionstart_revid Nullable(String), + sessionstart_timestamp Nullable(UInt64), + sessionstart_trackerversion Nullable(String), + sessionstart_useragent Nullable(String), + sessionstart_userbrowser Nullable(String), + sessionstart_userbrowserversion Nullable(String), + sessionstart_usercountry Nullable(String), + sessionstart_userdevice Nullable(String), + sessionstart_userdeviceheapsize Nullable(UInt64), + sessionstart_userdevicememorysize Nullable(UInt64), + sessionstart_userdevicetype Nullable(String), + sessionstart_useros Nullable(String), + sessionstart_userosversion Nullable(String), + sessionstart_useruuid Nullable(String), + setcssdata_data Nullable(UInt64), + setcssdata_id Nullable(UInt64), + setinputchecked_checked Nullable(UInt64), + setinputchecked_id Nullable(UInt64), + setinputtarget_id Nullable(UInt64), + setinputtarget_label Nullable(UInt64), + setinputvalue_id Nullable(UInt64), + setinputvalue_mask Nullable(UInt64), + setinputvalue_value Nullable(UInt64), + setnodeattribute_id Nullable(UInt64), + setnodeattribute_name Nullable(UInt64), + setnodeattribute_value Nullable(UInt64), + setnodedata_data Nullable(UInt64), + setnodedata_id Nullable(UInt64), + setnodescroll_id Nullable(UInt64), + setnodescroll_x Nullable(UInt64), + setnodescroll_y Nullable(UInt64), + setpagelocation_navigationstart Nullable(UInt64), + setpagelocation_referrer Nullable(String), + setpagelocation_url Nullable(String), + setpagevisibility_hidden Nullable(Bool), + setviewportscroll_x Nullable(UInt64), + setviewportscroll_y Nullable(UInt64), + setviewportsize_height Nullable(UInt64), + setviewportsize_width Nullable(UInt64), + stateaction_type Nullable(String), + stateactionevent_messageid Nullable(UInt64), + stateactionevent_timestamp Nullable(UInt64), + stateactionevent_type Nullable(String), + timestamp_timestamp Nullable(UInt64), + useranonymousid_id Nullable(String), + userid_id Nullable(String), + vuex_mutation Nullable(String), + vuex_state Nullable(String), + longtasks_timestamp Nullable(UInt64), + longtasks_duration Nullable(UInt64), + longtasks_context Nullable(UInt64), + longtasks_containertype Nullable(UInt64), + longtasks_containersrc Nullable(String), + longtasks_containerid Nullable(String), + longtasks_containername Nullable(UInt64), + setnodeurlbasedattribute_id Nullable(UInt64), + setnodeurlbasedattribute_name Nullable(String), + setnodeurlbasedattribute_value Nullable(String), + setnodeurlbasedattribute_baseurl Nullable(String), + setstyledata_id Nullable(UInt64), + setstyledata_data Nullable(String), + setstyledata_baseurl Nullable(String), + issueevent_messageid Nullable(UInt64), + issueevent_timestamp Nullable(UInt64), + issueevent_type Nullable(String), + issueevent_contextstring Nullable(String), + issueevent_context Nullable(String), + issueevent_payload Nullable(String), + technicalinfo_type Nullable(String), + technicalinfo_value Nullable(String), + customissue_name Nullable(String), + customissue_payload Nullable(String), + pageclose Nullable(UInt64), + received_at UInt64, + batch_order_number UInt64 +) ENGINE = MergeTree() +PARTITION BY intDiv(received_at, 100000) +ORDER BY (received_at, batch_order_number, sessionid) +PRIMARY KEY (received_at) +SETTINGS use_minimalistic_part_header_in_zookeeper=1, index_granularity=1000; diff --git a/ee/connectors/sql/clickhouse_events_detailed_buffer.sql b/ee/connectors/sql/clickhouse_events_detailed_buffer.sql new file mode 100644 index 000000000..fdaaf1520 --- /dev/null +++ b/ee/connectors/sql/clickhouse_events_detailed_buffer.sql @@ -0,0 +1,238 @@ +CREATE TABLE IF NOT EXISTS connector_events_detailed_buffer +( + sessionid UInt64, + clickevent_hesitationtime Nullable(UInt64), + clickevent_label Nullable(String), + clickevent_messageid Nullable(UInt64), + clickevent_timestamp Nullable(UInt64), + connectioninformation_downlink Nullable(UInt64), + connectioninformation_type Nullable(String), + consolelog_level Nullable(String), + consolelog_value Nullable(String), + cpuissue_duration Nullable(UInt64), + cpuissue_rate Nullable(UInt64), + cpuissue_timestamp Nullable(UInt64), + createdocument Nullable(Bool), + createelementnode_id Nullable(UInt64), + createelementnode_parentid Nullable(UInt64), + cssdeleterule_index Nullable(UInt64), + cssdeleterule_stylesheetid Nullable(UInt64), + cssinsertrule_index Nullable(UInt64), + cssinsertrule_rule Nullable(String), + cssinsertrule_stylesheetid Nullable(UInt64), + customevent_messageid Nullable(UInt64), + customevent_name Nullable(String), + customevent_payload Nullable(String), + customevent_timestamp Nullable(UInt64), + domdrop_timestamp Nullable(UInt64), + errorevent_message Nullable(String), + errorevent_messageid Nullable(UInt64), + errorevent_name Nullable(String), + errorevent_payload Nullable(String), + errorevent_source Nullable(String), + errorevent_timestamp Nullable(UInt64), + fetch_duration Nullable(UInt64), + fetch_method Nullable(String), + fetch_request Nullable(String), + fetch_response Nullable(String), + fetch_status Nullable(UInt64), + fetch_timestamp Nullable(UInt64), + fetch_url Nullable(String), + graphql_operationkind Nullable(String), + graphql_operationname Nullable(String), + graphql_response Nullable(String), + graphql_variables Nullable(String), + graphqlevent_messageid Nullable(UInt64), + graphqlevent_name Nullable(String), + graphqlevent_timestamp Nullable(UInt64), + inputevent_label Nullable(String), + inputevent_messageid Nullable(UInt64), + inputevent_timestamp Nullable(UInt64), + inputevent_value Nullable(String), + inputevent_valuemasked Nullable(Bool), + jsexception_message Nullable(String), + jsexception_name Nullable(String), + jsexception_payload Nullable(String), + memoryissue_duration Nullable(UInt64), + memoryissue_rate Nullable(UInt64), + memoryissue_timestamp Nullable(UInt64), + metadata_key Nullable(String), + metadata_value Nullable(String), + mobx_payload Nullable(String), + mobx_type Nullable(String), + mouseclick_id Nullable(UInt64), + mouseclick_hesitationtime Nullable(UInt64), + mouseclick_label Nullable(String), + mousemove_x Nullable(UInt64), + mousemove_y Nullable(UInt64), + movenode_id Nullable(UInt64), + movenode_index Nullable(UInt64), + movenode_parentid Nullable(UInt64), + ngrx_action Nullable(String), + ngrx_duration Nullable(UInt64), + ngrx_state Nullable(String), + otable_key Nullable(String), + otable_value Nullable(String), + pageevent_domcontentloadedeventend Nullable(UInt64), + pageevent_domcontentloadedeventstart Nullable(UInt64), + pageevent_firstcontentfulpaint Nullable(UInt64), + pageevent_firstpaint Nullable(UInt64), + pageevent_loaded Nullable(Bool), + pageevent_loadeventend Nullable(UInt64), + pageevent_loadeventstart Nullable(UInt64), + pageevent_messageid Nullable(UInt64), + pageevent_referrer Nullable(String), + pageevent_requeststart Nullable(UInt64), + pageevent_responseend Nullable(UInt64), + pageevent_responsestart Nullable(UInt64), + pageevent_speedindex Nullable(UInt64), + pageevent_timestamp Nullable(UInt64), + pageevent_url Nullable(String), + pageloadtiming_domcontentloadedeventend Nullable(UInt64), + pageloadtiming_domcontentloadedeventstart Nullable(UInt64), + pageloadtiming_firstcontentfulpaint Nullable(UInt64), + pageloadtiming_firstpaint Nullable(UInt64), + pageloadtiming_loadeventend Nullable(UInt64), + pageloadtiming_loadeventstart Nullable(UInt64), + pageloadtiming_requeststart Nullable(UInt64), + pageloadtiming_responseend Nullable(UInt64), + pageloadtiming_responsestart Nullable(UInt64), + pagerendertiming_speedindex Nullable(UInt64), + pagerendertiming_timetointeractive Nullable(UInt64), + pagerendertiming_visuallycomplete Nullable(UInt64), + performancetrack_frames Nullable(Int64), + performancetrack_ticks Nullable(Int64), + performancetrack_totaljsheapsize Nullable(UInt64), + performancetrack_usedjsheapsize Nullable(UInt64), + performancetrackaggr_avgcpu Nullable(UInt64), + performancetrackaggr_avgfps Nullable(UInt64), + performancetrackaggr_avgtotaljsheapsize Nullable(UInt64), + performancetrackaggr_avgusedjsheapsize Nullable(UInt64), + performancetrackaggr_maxcpu Nullable(UInt64), + performancetrackaggr_maxfps Nullable(UInt64), + performancetrackaggr_maxtotaljsheapsize Nullable(UInt64), + performancetrackaggr_maxusedjsheapsize Nullable(UInt64), + performancetrackaggr_mincpu Nullable(UInt64), + performancetrackaggr_minfps Nullable(UInt64), + performancetrackaggr_mintotaljsheapsize Nullable(UInt64), + performancetrackaggr_minusedjsheapsize Nullable(UInt64), + performancetrackaggr_timestampend Nullable(UInt64), + performancetrackaggr_timestampstart Nullable(UInt64), + profiler_args Nullable(String), + profiler_duration Nullable(UInt64), + profiler_name Nullable(String), + profiler_result Nullable(String), + rawcustomevent_name Nullable(String), + rawcustomevent_payload Nullable(String), + rawerrorevent_message Nullable(String), + rawerrorevent_name Nullable(String), + rawerrorevent_payload Nullable(String), + rawerrorevent_source Nullable(String), + rawerrorevent_timestamp Nullable(UInt64), + redux_action Nullable(String), + redux_duration Nullable(UInt64), + redux_state Nullable(String), + removenode_id Nullable(UInt64), + removenodeattribute_id Nullable(UInt64), + removenodeattribute_name Nullable(String), + resourceevent_decodedbodysize Nullable(UInt64), + resourceevent_duration Nullable(UInt64), + resourceevent_encodedbodysize Nullable(UInt64), + resourceevent_headersize Nullable(UInt64), + resourceevent_messageid Nullable(UInt64), + resourceevent_method Nullable(String), + resourceevent_status Nullable(UInt64), + resourceevent_success Nullable(Bool), + resourceevent_timestamp Nullable(UInt64), + resourceevent_ttfb Nullable(UInt64), + resourceevent_type Nullable(String), + resourceevent_url Nullable(String), + resourcetiming_decodedbodysize Nullable(UInt64), + resourcetiming_duration Nullable(UInt64), + resourcetiming_encodedbodysize Nullable(UInt64), + resourcetiming_headersize Nullable(UInt64), + resourcetiming_initiator Nullable(String), + resourcetiming_timestamp Nullable(UInt64), + resourcetiming_ttfb Nullable(UInt64), + resourcetiming_url Nullable(String), + sessiondisconnect Nullable(Bool), + sessiondisconnect_timestamp Nullable(UInt64), + sessionend Nullable(Bool), + sessionend_timestamp Nullable(UInt64), + sessionstart_projectid Nullable(UInt64), + sessionstart_revid Nullable(String), + sessionstart_timestamp Nullable(UInt64), + sessionstart_trackerversion Nullable(String), + sessionstart_useragent Nullable(String), + sessionstart_userbrowser Nullable(String), + sessionstart_userbrowserversion Nullable(String), + sessionstart_usercountry Nullable(String), + sessionstart_userdevice Nullable(String), + sessionstart_userdeviceheapsize Nullable(UInt64), + sessionstart_userdevicememorysize Nullable(UInt64), + sessionstart_userdevicetype Nullable(String), + sessionstart_useros Nullable(String), + sessionstart_userosversion Nullable(String), + sessionstart_useruuid Nullable(String), + setcssdata_data Nullable(UInt64), + setcssdata_id Nullable(UInt64), + setinputchecked_checked Nullable(UInt64), + setinputchecked_id Nullable(UInt64), + setinputtarget_id Nullable(UInt64), + setinputtarget_label Nullable(UInt64), + setinputvalue_id Nullable(UInt64), + setinputvalue_mask Nullable(UInt64), + setinputvalue_value Nullable(UInt64), + setnodeattribute_id Nullable(UInt64), + setnodeattribute_name Nullable(UInt64), + setnodeattribute_value Nullable(UInt64), + setnodedata_data Nullable(UInt64), + setnodedata_id Nullable(UInt64), + setnodescroll_id Nullable(UInt64), + setnodescroll_x Nullable(UInt64), + setnodescroll_y Nullable(UInt64), + setpagelocation_navigationstart Nullable(UInt64), + setpagelocation_referrer Nullable(String), + setpagelocation_url Nullable(String), + setpagevisibility_hidden Nullable(Bool), + setviewportscroll_x Nullable(UInt64), + setviewportscroll_y Nullable(UInt64), + setviewportsize_height Nullable(UInt64), + setviewportsize_width Nullable(UInt64), + stateaction_type Nullable(String), + stateactionevent_messageid Nullable(UInt64), + stateactionevent_timestamp Nullable(UInt64), + stateactionevent_type Nullable(String), + timestamp_timestamp Nullable(UInt64), + useranonymousid_id Nullable(String), + userid_id Nullable(String), + vuex_mutation Nullable(String), + vuex_state Nullable(String), + longtasks_timestamp Nullable(UInt64), + longtasks_duration Nullable(UInt64), + longtasks_context Nullable(UInt64), + longtasks_containertype Nullable(UInt64), + longtasks_containersrc Nullable(String), + longtasks_containerid Nullable(String), + longtasks_containername Nullable(UInt64), + setnodeurlbasedattribute_id Nullable(UInt64), + setnodeurlbasedattribute_name Nullable(String), + setnodeurlbasedattribute_value Nullable(String), + setnodeurlbasedattribute_baseurl Nullable(String), + setstyledata_id Nullable(UInt64), + setstyledata_data Nullable(String), + setstyledata_baseurl Nullable(String), + issueevent_messageid Nullable(UInt64), + issueevent_timestamp Nullable(UInt64), + issueevent_type Nullable(String), + issueevent_contextstring Nullable(String), + issueevent_context Nullable(String), + issueevent_payload Nullable(String), + technicalinfo_type Nullable(String), + technicalinfo_value Nullable(String), + customissue_name Nullable(String), + customissue_payload Nullable(String), + pageclose Nullable(UInt64), + received_at UInt64, + batch_order_number UInt64 +) ENGINE = Buffer(default, connector_events_detailed, 16, 10, 120, 10000, 1000000, 10000, 100000000); From 0b38f1dedbdea691eb8a3b4977ab7267ee93209e Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 20 Jul 2022 10:55:25 +0200 Subject: [PATCH 11/33] Updated types in BigQuery table --- ee/connectors/bigquery_utils/create_table.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ee/connectors/bigquery_utils/create_table.py b/ee/connectors/bigquery_utils/create_table.py index 5dc4e9b9a..e88c903ec 100644 --- a/ee/connectors/bigquery_utils/create_table.py +++ b/ee/connectors/bigquery_utils/create_table.py @@ -341,20 +341,20 @@ def create_events_detailed_table(creds_file, table_id): bigquery.SchemaField("vuex_state", "STRING"), bigquery.SchemaField("longtasks_timestamp", "INT64"), bigquery.SchemaField("longtasks_duration", "INT64"), - bigquery.SchemaField("longtasks_context", "FLOAT64"), - bigquery.SchemaField("longtasks_containertype", "FLOAT64"), + bigquery.SchemaField("longtasks_context", "INT64"), + bigquery.SchemaField("longtasks_containertype", "INT64"), bigquery.SchemaField("longtasks_containersrc", "STRING"), bigquery.SchemaField("longtasks_containerid", "STRING"), - bigquery.SchemaField("longtasks_containername", "FLOAT64"), - bigquery.SchemaField("setnodeurlbasedattribute_id", "FLOAT64"), + bigquery.SchemaField("longtasks_containername", "INT64"), + bigquery.SchemaField("setnodeurlbasedattribute_id", "INT64"), bigquery.SchemaField("setnodeurlbasedattribute_name", "STRING"), bigquery.SchemaField("setnodeurlbasedattribute_value", "STRING"), bigquery.SchemaField("setnodeurlbasedattribute_baseurl", "STRING"), - bigquery.SchemaField("setstyledata_id", "FLOAT64"), + bigquery.SchemaField("setstyledata_id", "INT64"), bigquery.SchemaField("setstyledata_data", "STRING"), bigquery.SchemaField("setstyledata_baseurl", "STRING"), - bigquery.SchemaField("issueevent_messageid", "FLOAT64"), - bigquery.SchemaField("issueevent_timestamp", "FLOAT64"), + bigquery.SchemaField("issueevent_messageid", "INT64"), + bigquery.SchemaField("issueevent_timestamp", "INT64"), bigquery.SchemaField("issueevent_type", "STRING"), bigquery.SchemaField("issueevent_contextstring", "STRING"), bigquery.SchemaField("issueevent_context", "STRING"), @@ -363,7 +363,7 @@ def create_events_detailed_table(creds_file, table_id): bigquery.SchemaField("technicalinfo_value", "STRING"), bigquery.SchemaField("customissue_name", "STRING"), bigquery.SchemaField("customissue_payload", "STRING"), - bigquery.SchemaField("pageclose", "FLOAT64"), + bigquery.SchemaField("pageclose", "INT64"), bigquery.SchemaField("received_at", "INT64", mode="REQUIRED"), bigquery.SchemaField("batch_order_number", "INT64", mode="REQUIRED") ] From ee80c7c03fd4c049867ba2a0ef557a220a45df15 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 22 Jul 2022 15:05:42 +0200 Subject: [PATCH 12/33] RedShift operational --- ee/connectors/db/tables.py | 5 + ee/connectors/sql/redshift_events.sql | 4 +- .../sql/redshift_events_detailed.sql | 238 ++++++++++++++++++ ee/connectors/sql/redshift_sessions.sql | 4 +- 4 files changed, 247 insertions(+), 4 deletions(-) create mode 100644 ee/connectors/sql/redshift_events_detailed.sql diff --git a/ee/connectors/db/tables.py b/ee/connectors/db/tables.py index db1c2ca42..58f57514c 100644 --- a/ee/connectors/db/tables.py +++ b/ee/connectors/db/tables.py @@ -74,3 +74,8 @@ def create_tables_redshift(db): q = f.read() db.engine.execute(q) print(f"`connector_sessions` table created succesfully.") + + #with open(base_path / 'sql' / 'redshift_events_detailed.sql') as f: + # q = f.read() + #db.engine.execute(q) + #print(f"`connector_user_events_detailed` table created succesfully.") diff --git a/ee/connectors/sql/redshift_events.sql b/ee/connectors/sql/redshift_events.sql index c310e3202..773444233 100644 --- a/ee/connectors/sql/redshift_events.sql +++ b/ee/connectors/sql/redshift_events.sql @@ -1,4 +1,4 @@ -CREATE TABLE connector_events +CREATE TABLE IF NOT EXISTS connector_events ( sessionid BIGINT, connectioninformation_downlink BIGINT, @@ -49,4 +49,4 @@ CREATE TABLE connector_events customissue_payload VARCHAR(300), received_at BIGINT, batch_order_number BIGINT -); \ No newline at end of file +); diff --git a/ee/connectors/sql/redshift_events_detailed.sql b/ee/connectors/sql/redshift_events_detailed.sql new file mode 100644 index 000000000..f50078828 --- /dev/null +++ b/ee/connectors/sql/redshift_events_detailed.sql @@ -0,0 +1,238 @@ +CREATE TABLE IF NOT EXISTS connector_events_detailed +( + sessionid BIGINT, + clickevent_hesitationtime BIGINT, + clickevent_label VARCHAR(300), + clickevent_messageid BIGINT, + clickevent_timestamp BIGINT, + connectioninformation_downlink BIGINT, + connectioninformation_type VARCHAR(300), + consolelog_level VARCHAR(300), + consolelog_value VARCHAR(300), + cpuissue_duration BIGINT, + cpuissue_rate BIGINT, + cpuissue_timestamp BIGINT, + createdocument BOOLEAN, + createelementnode_id BIGINT, + createelementnode_parentid BIGINT, + cssdeleterule_index BIGINT, + cssdeleterule_stylesheetid BIGINT, + cssinsertrule_index BIGINT, + cssinsertrule_rule VARCHAR(300), + cssinsertrule_stylesheetid BIGINT, + customevent_messageid BIGINT, + customevent_name VARCHAR(300), + customevent_payload VARCHAR(300), + customevent_timestamp BIGINT, + domdrop_timestamp BIGINT, + errorevent_message VARCHAR(300), + errorevent_messageid BIGINT, + errorevent_name VARCHAR(300), + errorevent_payload VARCHAR(300), + errorevent_source VARCHAR(300), + errorevent_timestamp BIGINT, + fetch_duration BIGINT, + fetch_method VARCHAR(300), + fetch_request VARCHAR(300), + fetch_response VARCHAR(300), + fetch_status BIGINT, + fetch_timestamp BIGINT, + fetch_url VARCHAR(300), + graphql_operationkind VARCHAR(300), + graphql_operationname VARCHAR(300), + graphql_response VARCHAR(300), + graphql_variables VARCHAR(300), + graphqlevent_messageid BIGINT, + graphqlevent_name VARCHAR(300), + graphqlevent_timestamp BIGINT, + inputevent_label VARCHAR(300), + inputevent_messageid BIGINT, + inputevent_timestamp BIGINT, + inputevent_value VARCHAR(300), + inputevent_valuemasked BOOLEAN, + jsexception_message VARCHAR(300), + jsexception_name VARCHAR(300), + jsexception_payload VARCHAR(300), + memoryissue_duration BIGINT, + memoryissue_rate BIGINT, + memoryissue_timestamp BIGINT, + metadata_key VARCHAR(300), + metadata_value VARCHAR(300), + mobx_payload VARCHAR(300), + mobx_type VARCHAR(300), + mouseclick_id BIGINT, + mouseclick_hesitationtime BIGINT, + mouseclick_label VARCHAR(300), + mousemove_x BIGINT, + mousemove_y BIGINT, + movenode_id BIGINT, + movenode_index BIGINT, + movenode_parentid BIGINT, + ngrx_action VARCHAR(300), + ngrx_duration BIGINT, + ngrx_state VARCHAR(300), + otable_key VARCHAR(300), + otable_value VARCHAR(300), + pageevent_domcontentloadedeventend BIGINT, + pageevent_domcontentloadedeventstart BIGINT, + pageevent_firstcontentfulpaint BIGINT, + pageevent_firstpaint BIGINT, + pageevent_loaded BOOLEAN, + pageevent_loadeventend BIGINT, + pageevent_loadeventstart BIGINT, + pageevent_messageid BIGINT, + pageevent_referrer VARCHAR(300), + pageevent_requeststart BIGINT, + pageevent_responseend BIGINT, + pageevent_responsestart BIGINT, + pageevent_speedindex BIGINT, + pageevent_timestamp BIGINT, + pageevent_url VARCHAR(300), + pageloadtiming_domcontentloadedeventend BIGINT, + pageloadtiming_domcontentloadedeventstart BIGINT, + pageloadtiming_firstcontentfulpaint BIGINT, + pageloadtiming_firstpaint BIGINT, + pageloadtiming_loadeventend BIGINT, + pageloadtiming_loadeventstart BIGINT, + pageloadtiming_requeststart BIGINT, + pageloadtiming_responseend BIGINT, + pageloadtiming_responsestart BIGINT, + pagerendertiming_speedindex BIGINT, + pagerendertiming_timetointeractive BIGINT, + pagerendertiming_visuallycomplete BIGINT, + performancetrack_frames BIGINT, + performancetrack_ticks BIGINT, + performancetrack_totaljsheapsize BIGINT, + performancetrack_usedjsheapsize BIGINT, + performancetrackaggr_avgcpu BIGINT, + performancetrackaggr_avgfps BIGINT, + performancetrackaggr_avgtotaljsheapsize BIGINT, + performancetrackaggr_avgusedjsheapsize BIGINT, + performancetrackaggr_maxcpu BIGINT, + performancetrackaggr_maxfps BIGINT, + performancetrackaggr_maxtotaljsheapsize BIGINT, + performancetrackaggr_maxusedjsheapsize BIGINT, + performancetrackaggr_mincpu BIGINT, + performancetrackaggr_minfps BIGINT, + performancetrackaggr_mintotaljsheapsize BIGINT, + performancetrackaggr_minusedjsheapsize BIGINT, + performancetrackaggr_timestampend BIGINT, + performancetrackaggr_timestampstart BIGINT, + profiler_args VARCHAR(300), + profiler_duration BIGINT, + profiler_name VARCHAR(300), + profiler_result VARCHAR(300), + rawcustomevent_name VARCHAR(300), + rawcustomevent_payload VARCHAR(300), + rawerrorevent_message VARCHAR(300), + rawerrorevent_name VARCHAR(300), + rawerrorevent_payload VARCHAR(300), + rawerrorevent_source VARCHAR(300), + rawerrorevent_timestamp BIGINT, + redux_action VARCHAR(300), + redux_duration BIGINT, + redux_state VARCHAR(300), + removenode_id BIGINT, + removenodeattribute_id BIGINT, + removenodeattribute_name VARCHAR(300), + resourceevent_decodedbodysize BIGINT, + resourceevent_duration BIGINT, + resourceevent_encodedbodysize BIGINT, + resourceevent_headersize BIGINT, + resourceevent_messageid BIGINT, + resourceevent_method VARCHAR(300), + resourceevent_status BIGINT, + resourceevent_success BOOLEAN, + resourceevent_timestamp BIGINT, + resourceevent_ttfb BIGINT, + resourceevent_type VARCHAR(300), + resourceevent_url VARCHAR(300), + resourcetiming_decodedbodysize BIGINT, + resourcetiming_duration BIGINT, + resourcetiming_encodedbodysize BIGINT, + resourcetiming_headersize BIGINT, + resourcetiming_initiator VARCHAR(300), + resourcetiming_timestamp BIGINT, + resourcetiming_ttfb BIGINT, + resourcetiming_url VARCHAR(300), + sessiondisconnect BOOLEAN, + sessiondisconnect_timestamp BIGINT, + sessionend BOOLEAN, + sessionend_timestamp BIGINT, + sessionstart_projectid BIGINT, + sessionstart_revid VARCHAR(300), + sessionstart_timestamp BIGINT, + sessionstart_trackerversion VARCHAR(300), + sessionstart_useragent VARCHAR(300), + sessionstart_userbrowser VARCHAR(300), + sessionstart_userbrowserversion VARCHAR(300), + sessionstart_usercountry VARCHAR(300), + sessionstart_userdevice VARCHAR(300), + sessionstart_userdeviceheapsize BIGINT, + sessionstart_userdevicememorysize BIGINT, + sessionstart_userdevicetype VARCHAR(300), + sessionstart_useros VARCHAR(300), + sessionstart_userosversion VARCHAR(300), + sessionstart_useruuid VARCHAR(300), + setcssdata_data BIGINT, + setcssdata_id BIGINT, + setinputchecked_checked BIGINT, + setinputchecked_id BIGINT, + setinputtarget_id BIGINT, + setinputtarget_label BIGINT, + setinputvalue_id BIGINT, + setinputvalue_mask BIGINT, + setinputvalue_value BIGINT, + setnodeattribute_id BIGINT, + setnodeattribute_name BIGINT, + setnodeattribute_value BIGINT, + setnodedata_data BIGINT, + setnodedata_id BIGINT, + setnodescroll_id BIGINT, + setnodescroll_x BIGINT, + setnodescroll_y BIGINT, + setpagelocation_navigationstart BIGINT, + setpagelocation_referrer VARCHAR(300), + setpagelocation_url VARCHAR(300), + setpagevisibility_hidden BOOLEAN, + setviewportscroll_x BIGINT, + setviewportscroll_y BIGINT, + setviewportsize_height BIGINT, + setviewportsize_width BIGINT, + stateaction_type VARCHAR(300), + stateactionevent_messageid BIGINT, + stateactionevent_timestamp BIGINT, + stateactionevent_type VARCHAR(300), + timestamp_timestamp BIGINT, + useranonymousid_id VARCHAR(300), + userid_id VARCHAR(300), + vuex_mutation VARCHAR(300), + vuex_state VARCHAR(300), + longtasks_timestamp BIGINT, + longtasks_duration BIGINT, + longtasks_context BIGINT, + longtasks_containertype BIGINT, + longtasks_containersrc VARCHAR(300), + longtasks_containerid VARCHAR(300), + longtasks_containername BIGINT, + setnodeurlbasedattribute_id BIGINT, + setnodeurlbasedattribute_name VARCHAR(300), + setnodeurlbasedattribute_value VARCHAR(300), + setnodeurlbasedattribute_baseurl VARCHAR(300), + setstyledata_id BIGINT, + setstyledata_data VARCHAR(300), + setstyledata_baseurl VARCHAR(300), + issueevent_messageid BIGINT, + issueevent_timestamp BIGINT, + issueevent_type VARCHAR(300), + issueevent_contextstring VARCHAR(300), + issueevent_context VARCHAR(300), + issueevent_payload VARCHAR(300), + technicalinfo_type VARCHAR(300), + technicalinfo_value VARCHAR(300), + customissue_name VARCHAR(300), + customissue_payload VARCHAR(300), + pageclose BIGINT, + received_at BIGINT, + batch_order_number BIGINT +); diff --git a/ee/connectors/sql/redshift_sessions.sql b/ee/connectors/sql/redshift_sessions.sql index f1750dcc2..ba1887ac1 100644 --- a/ee/connectors/sql/redshift_sessions.sql +++ b/ee/connectors/sql/redshift_sessions.sql @@ -1,4 +1,4 @@ -CREATE TABLE connector_user_sessions +CREATE TABLE IF NOT EXISTS connector_user_sessions ( -- SESSION METADATA sessionid bigint, @@ -47,4 +47,4 @@ CREATE TABLE connector_user_sessions issues VARCHAR, urls_count bigint, urls VARCHAR -); \ No newline at end of file +); From 4b11c77bcf063f45883d2cf8d02dd422f5478e4c Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 13:39:04 +0200 Subject: [PATCH 13/33] quickwit server taking inputs from Kafka quickwit-kafka topic and saving data in s3 quickwit bucket --- clean.sh | 2 + create_kafka_index.sh | 1 + create_source.sh | 1 + index-config.yaml | 21 + kafka-source.yaml | 9 + kafka_sample.py | 119 ++++++ msgcodec/codec.py | 819 +++++++++++++++++++++++++++++++++++++ msgcodec/messages.py | 919 ++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + run_quickwit.sh | 1 + s3-config-listen.yaml | 5 + s3-config.yaml | 4 + 12 files changed, 1902 insertions(+) create mode 100644 clean.sh create mode 100644 create_kafka_index.sh create mode 100644 create_source.sh create mode 100644 index-config.yaml create mode 100644 kafka-source.yaml create mode 100644 kafka_sample.py create mode 100644 msgcodec/codec.py create mode 100644 msgcodec/messages.py create mode 100644 requirements.txt create mode 100644 run_quickwit.sh create mode 100644 s3-config-listen.yaml create mode 100644 s3-config.yaml diff --git a/clean.sh b/clean.sh new file mode 100644 index 000000000..81469034c --- /dev/null +++ b/clean.sh @@ -0,0 +1,2 @@ +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml $(pwd)/qwdata:/quickwit/qwdata quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml diff --git a/create_kafka_index.sh b/create_kafka_index.sh new file mode 100644 index 000000000..651b3983c --- /dev/null +++ b/create_kafka_index.sh @@ -0,0 +1 @@ +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/index-config.yaml:/quickwit/index-config.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index create --index-config index-config.yaml --config s3-config.yaml diff --git a/create_source.sh b/create_source.sh new file mode 100644 index 000000000..b1c37c020 --- /dev/null +++ b/create_source.sh @@ -0,0 +1 @@ +docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/kafka-source.yaml:/quickwit/kafka-source.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source create --index quickwit-kafka --source-config kafka-source.yaml --config s3-config.yaml diff --git a/index-config.yaml b/index-config.yaml new file mode 100644 index 000000000..43f8d3324 --- /dev/null +++ b/index-config.yaml @@ -0,0 +1,21 @@ +# +# Index config file for gh-archive dataset. +# + +version: 0 + +index_id: quickwit-kafka + +doc_mapping: + field_mappings: + - name: title + type: text + tokenizer: default + record: position + - name: body + type: text + tokenizer: default + record: position + +search_settings: + default_search_fields: [title, body] diff --git a/kafka-source.yaml b/kafka-source.yaml new file mode 100644 index 000000000..89b6174cf --- /dev/null +++ b/kafka-source.yaml @@ -0,0 +1,9 @@ +# +# Kafka source config file. +# +source_id: kafka-source +source_type: kafka +params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 diff --git a/kafka_sample.py b/kafka_sample.py new file mode 100644 index 000000000..9d58ae54d --- /dev/null +++ b/kafka_sample.py @@ -0,0 +1,119 @@ +import os +from time import sleep +from kafka import KafkaConsumer, KafkaProducer +from datetime import datetime +from collections import defaultdict + +from msgcodec.codec import MessageCodec +from msgcodec.messages import SessionEnd, Fetch, FetchEvent, PageEvent, SetCSSData, SetStyleData +import json + +import getopt, sys + +n = 0 + +def transform(data): + global n + n += 1 + return {'title': f'message {n}', 'body': data} + +def create_producer(): + producer = KafkaProducer(#security_protocol="SSL", + bootstrap_servers=os.environ['KAFKA_SERVER_2'], + # os.environ['KAFKA_SERVER_1']], + #ssl_cafile="./ca.pem", + #ssl_certfile="./service.cert", + #ssl_keyfile="./service.key", + value_serializer=lambda v: json.dumps(v).encode('ascii') + ) + return producer + +def create_consumer(): + consumer = KafkaConsumer(#security_protocol="SSL", + bootstrap_servers=os.environ['KAFKA_SERVER_2'], + # os.environ['KAFKA_SERVER_1']], + group_id=f"my_test52_connector", + auto_offset_reset="earliest", + enable_auto_commit=False + ) + return consumer + + +def consumer_producer_end(): + global n + batch_size = 4000 + sessions_batch_size = 400 + batch = [] + sessions = defaultdict(lambda: None) + sessions_batch = [] + + codec = MessageCodec() + consumer = create_consumer() + producer = create_producer() + + consumer.subscribe(topics=["raw", "raw_ios"]) + print("Kafka consumer subscribed") + escape = 0 + for msg in consumer: + messages = codec.decode_detailed(msg.value) + session_id = codec.decode_key(msg.key) + if messages is None: + print('-') + for message in messages: + send = True + if isinstance(message, Fetch): + data = message.response + elif isinstance(message, FetchEvent): + data = message.response + elif isinstance(message, PageEvent): + print(message.url) + elif isinstance(message, SetCSSData): + data = message.data + elif isinstance(message, SetStyleData): + data = message.data + else: + send = False + continue + if send: + producer.send('quickwit-kafka', value=transform(data)) + print(f'added message {n}') + sleep(5) + + + +def consumer_end(): + consumer = create_consumer() + consumer.subscribe(topics=['quickwit-kafka']) + for msg in consumer: + print(msg) + + +def handle_args(): + arguments = len(sys.argv)-1 + argument_list = sys.argv[1:] + pos = 1 + short_options = 'hm:' + long_options = ['help', 'method='] + try: + arguments, values = getopt.getopt(argument_list, short_options, long_options) + except getopt.error as err: + print(str(err)) + sys.exit(2) + + for arg, argv in arguments: + if arg in ('-h', '--help'): + print(""" Methods +--method, -m available methods: consumer, producer +--help, -h show help + """) + elif arg in ('-m', '--method'): + if argv == 'consumer': + consumer_end() + elif argv == 'producer': + consumer_producer_end() + else: + print('Method not found. Available methods: consumer, producer') + + +if __name__ == '__main__': + handle_args() diff --git a/msgcodec/codec.py b/msgcodec/codec.py new file mode 100644 index 000000000..7e1aabbf9 --- /dev/null +++ b/msgcodec/codec.py @@ -0,0 +1,819 @@ +import io +from typing import List +from msgcodec.messages import * + + +class Codec: + """ + Implements encode/decode primitives + """ + + @staticmethod + def read_boolean(reader: io.BytesIO): + b = reader.read(1) + return b == 1 + + @staticmethod + def read_uint(reader: io.BytesIO): + """ + The ending "big" doesn't play any role here, + since we're dealing with data per one byte + """ + x = 0 # the result + s = 0 # the shift (our result is big-ending) + i = 0 # n of byte (max 9 for uint64) + while True: + b = reader.read(1) + if len(b) == 0: + raise IndexError('bytes out of range') + num = int.from_bytes(b, "big", signed=False) + # print(i, x) + + if num < 0x80: + if i > 9 | i == 9 & num > 1: + raise OverflowError() + return int(x | num << s) + x |= (num & 0x7f) << s + s += 7 + i += 1 + + @staticmethod + def read_int(reader: io.BytesIO) -> int: + """ + ux, err := ReadUint(reader) + x := int64(ux >> 1) + if err != nil { + return x, err + } + if ux&1 != 0 { + x = ^x + } + return x, err + """ + ux = Codec.read_uint(reader) + x = int(ux >> 1) + + if ux & 1 != 0: + x = - x - 1 + return x + + @staticmethod + def read_string(reader: io.BytesIO) -> str: + length = Codec.read_uint(reader) + s = reader.read(length) + try: + return s.decode("utf-8", errors="replace").replace("\x00", "\uFFFD") + except UnicodeDecodeError: + return None + + +class MessageCodec(Codec): + + def encode(self, m: Message) -> bytes: + ... + + def decode(self, b: bytes) -> Message: + reader = io.BytesIO(b) + return self.read_head_message(reader) + + def decode_detailed(self, b: bytes) -> List[Message]: + reader = io.BytesIO(b) + messages_list = list() + while True: + try: + messages_list.append(self.read_head_message(reader)) + except IndexError: + break + return messages_list + + def read_head_message(self, reader: io.BytesIO) -> Message: + message_id = self.read_message_id(reader) + if message_id == 0: + return Timestamp( + timestamp=self.read_uint(reader) + ) + if message_id == 1: + return SessionStart( + timestamp=self.read_uint(reader), + project_id=self.read_uint(reader), + tracker_version=self.read_string(reader), + rev_id=self.read_string(reader), + user_uuid=self.read_string(reader), + user_agent=self.read_string(reader), + user_os=self.read_string(reader), + user_os_version=self.read_string(reader), + user_browser=self.read_string(reader), + user_browser_version=self.read_string(reader), + user_device=self.read_string(reader), + user_device_type=self.read_string(reader), + user_device_memory_size=self.read_uint(reader), + user_device_heap_size=self.read_uint(reader), + user_country=self.read_string(reader) + ) + + if message_id == 2: + return SessionDisconnect( + timestamp=self.read_uint(reader) + ) + + if message_id == 3: + return SessionEnd( + timestamp=self.read_uint(reader) + ) + + if message_id == 4: + return SetPageLocation( + url=self.read_string(reader), + referrer=self.read_string(reader), + navigation_start=self.read_uint(reader) + ) + + if message_id == 5: + return SetViewportSize( + width=self.read_uint(reader), + height=self.read_uint(reader) + ) + + if message_id == 6: + return SetViewportScroll( + x=self.read_int(reader), + y=self.read_int(reader) + ) + + if message_id == 7: + return CreateDocument() + + if message_id == 8: + return CreateElementNode( + id=self.read_uint(reader), + parent_id=self.read_uint(reader), + index=self.read_uint(reader), + tag=self.read_string(reader), + svg=self.read_boolean(reader), + ) + + if message_id == 9: + return CreateTextNode( + id=self.read_uint(reader), + parent_id=self.read_uint(reader), + index=self.read_uint(reader) + ) + + if message_id == 10: + return MoveNode( + id=self.read_uint(reader), + parent_id=self.read_uint(reader), + index=self.read_uint(reader) + ) + + if message_id == 11: + return RemoveNode( + id=self.read_uint(reader) + ) + + if message_id == 12: + return SetNodeAttribute( + id=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 13: + return RemoveNodeAttribute( + id=self.read_uint(reader), + name=self.read_string(reader) + ) + + if message_id == 14: + return SetNodeData( + id=self.read_uint(reader), + data=self.read_string(reader) + ) + + if message_id == 15: + return SetCSSData( + id=self.read_uint(reader), + data=self.read_string(reader) + ) + + if message_id == 16: + return SetNodeScroll( + id=self.read_uint(reader), + x=self.read_int(reader), + y=self.read_int(reader), + ) + + if message_id == 17: + return SetInputTarget( + id=self.read_uint(reader), + label=self.read_string(reader) + ) + + if message_id == 18: + return SetInputValue( + id=self.read_uint(reader), + value=self.read_string(reader), + mask=self.read_int(reader), + ) + + if message_id == 19: + return SetInputChecked( + id=self.read_uint(reader), + checked=self.read_boolean(reader) + ) + + if message_id == 20: + return MouseMove( + x=self.read_uint(reader), + y=self.read_uint(reader) + ) + + if message_id == 21: + return MouseClickDepricated( + id=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader) + ) + + if message_id == 22: + return ConsoleLog( + level=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 23: + return PageLoadTiming( + request_start=self.read_uint(reader), + response_start=self.read_uint(reader), + response_end=self.read_uint(reader), + dom_content_loaded_event_start=self.read_uint(reader), + dom_content_loaded_event_end=self.read_uint(reader), + load_event_start=self.read_uint(reader), + load_event_end=self.read_uint(reader), + first_paint=self.read_uint(reader), + first_contentful_paint=self.read_uint(reader) + ) + + if message_id == 24: + return PageRenderTiming( + speed_index=self.read_uint(reader), + visually_complete=self.read_uint(reader), + time_to_interactive=self.read_uint(reader), + ) + + if message_id == 25: + return JSException( + name=self.read_string(reader), + message=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 26: + return RawErrorEvent( + timestamp=self.read_uint(reader), + source=self.read_string(reader), + name=self.read_string(reader), + message=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 27: + return RawCustomEvent( + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 28: + return UserID( + id=self.read_string(reader) + ) + + if message_id == 29: + return UserAnonymousID( + id=self.read_string(reader) + ) + + if message_id == 30: + return Metadata( + key=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 31: + return PageEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + url=self.read_string(reader), + referrer=self.read_string(reader), + loaded=self.read_boolean(reader), + request_start=self.read_uint(reader), + response_start=self.read_uint(reader), + response_end=self.read_uint(reader), + dom_content_loaded_event_start=self.read_uint(reader), + dom_content_loaded_event_end=self.read_uint(reader), + load_event_start=self.read_uint(reader), + load_event_end=self.read_uint(reader), + first_paint=self.read_uint(reader), + first_contentful_paint=self.read_uint(reader), + speed_index=self.read_uint(reader), + visually_complete=self.read_uint(reader), + time_to_interactive=self.read_uint(reader) + ) + + if message_id == 32: + return InputEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + value=self.read_string(reader), + value_masked=self.read_boolean(reader), + label=self.read_string(reader), + ) + + if message_id == 33: + return ClickEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader) + ) + + if message_id == 34: + return ErrorEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + source=self.read_string(reader), + name=self.read_string(reader), + message=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 35: + + message_id = self.read_uint(reader) + ts = self.read_uint(reader) + if ts > 9999999999999: + ts = None + return ResourceEvent( + message_id=message_id, + timestamp=ts, + duration=self.read_uint(reader), + ttfb=self.read_uint(reader), + header_size=self.read_uint(reader), + encoded_body_size=self.read_uint(reader), + decoded_body_size=self.read_uint(reader), + url=self.read_string(reader), + type=self.read_string(reader), + success=self.read_boolean(reader), + method=self.read_string(reader), + status=self.read_uint(reader) + ) + + if message_id == 36: + return CustomEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 37: + return CSSInsertRule( + id=self.read_uint(reader), + rule=self.read_string(reader), + index=self.read_uint(reader) + ) + + if message_id == 38: + return CSSDeleteRule( + id=self.read_uint(reader), + index=self.read_uint(reader) + ) + + if message_id == 39: + return Fetch( + method=self.read_string(reader), + url=self.read_string(reader), + request=self.read_string(reader), + response=self.read_string(reader), + status=self.read_uint(reader), + timestamp=self.read_uint(reader), + duration=self.read_uint(reader) + ) + + if message_id == 40: + return Profiler( + name=self.read_string(reader), + duration=self.read_uint(reader), + args=self.read_string(reader), + result=self.read_string(reader) + ) + + if message_id == 41: + return OTable( + key=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 42: + return StateAction( + type=self.read_string(reader) + ) + + if message_id == 43: + return StateActionEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + type=self.read_string(reader) + ) + + if message_id == 44: + return Redux( + action=self.read_string(reader), + state=self.read_string(reader), + duration=self.read_uint(reader) + ) + + if message_id == 45: + return Vuex( + mutation=self.read_string(reader), + state=self.read_string(reader), + ) + + if message_id == 46: + return MobX( + type=self.read_string(reader), + payload=self.read_string(reader), + ) + + if message_id == 47: + return NgRx( + action=self.read_string(reader), + state=self.read_string(reader), + duration=self.read_uint(reader) + ) + + if message_id == 48: + return GraphQL( + operation_kind=self.read_string(reader), + operation_name=self.read_string(reader), + variables=self.read_string(reader), + response=self.read_string(reader) + ) + + if message_id == 49: + return PerformanceTrack( + frames=self.read_int(reader), + ticks=self.read_int(reader), + total_js_heap_size=self.read_uint(reader), + used_js_heap_size=self.read_uint(reader) + ) + + if message_id == 50: + return GraphQLEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + name=self.read_string(reader) + ) + + if message_id == 51: + return FetchEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + method=self.read_string(reader), + url=self.read_string(reader), + request=self.read_string(reader), + response=self.read_string(reader), + status=self.read_uint(reader), + duration=self.read_uint(reader) + ) + + if message_id == 52: + return DomDrop( + timestamp=self.read_uint(reader) + ) + + if message_id == 53: + return ResourceTiming( + timestamp=self.read_uint(reader), + duration=self.read_uint(reader), + ttfb=self.read_uint(reader), + header_size=self.read_uint(reader), + encoded_body_size=self.read_uint(reader), + decoded_body_size=self.read_uint(reader), + url=self.read_string(reader), + initiator=self.read_string(reader) + ) + + if message_id == 54: + return ConnectionInformation( + downlink=self.read_uint(reader), + type=self.read_string(reader) + ) + + if message_id == 55: + return SetPageVisibility( + hidden=self.read_boolean(reader) + ) + + if message_id == 56: + return PerformanceTrackAggr( + timestamp_start=self.read_uint(reader), + timestamp_end=self.read_uint(reader), + min_fps=self.read_uint(reader), + avg_fps=self.read_uint(reader), + max_fps=self.read_uint(reader), + min_cpu=self.read_uint(reader), + avg_cpu=self.read_uint(reader), + max_cpu=self.read_uint(reader), + min_total_js_heap_size=self.read_uint(reader), + avg_total_js_heap_size=self.read_uint(reader), + max_total_js_heap_size=self.read_uint(reader), + min_used_js_heap_size=self.read_uint(reader), + avg_used_js_heap_size=self.read_uint(reader), + max_used_js_heap_size=self.read_uint(reader) + ) + + if message_id == 59: + return LongTask( + timestamp=self.read_uint(reader), + duration=self.read_uint(reader), + context=self.read_uint(reader), + container_type=self.read_uint(reader), + container_src=self.read_string(reader), + container_id=self.read_string(reader), + container_name=self.read_string(reader) + ) + + if message_id == 60: + return SetNodeURLBasedAttribute( + id=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_string(reader), + base_url=self.read_string(reader) + ) + + if message_id == 61: + return SetStyleData( + id=self.read_uint(reader), + data=self.read_string(reader), + base_url=self.read_string(reader) + ) + + if message_id == 62: + return IssueEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + type=self.read_string(reader), + context_string=self.read_string(reader), + context=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 63: + return TechnicalInfo( + type=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 64: + return CustomIssue( + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 65: + return PageClose() + + if message_id == 66: + return AssetCache( + url=self.read_string(reader) + ) + + if message_id == 67: + return CSSInsertRuleURLBased( + id=self.read_uint(reader), + rule=self.read_string(reader), + index=self.read_uint(reader), + base_url=self.read_string(reader) + ) + + if message_id == 69: + return MouseClick( + id=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader), + selector=self.read_string(reader) + ) + + if message_id == 70: + return CreateIFrameDocument( + frame_id=self.read_uint(reader), + id=self.read_uint(reader) + ) + + if message_id == 80: + return BatchMeta( + page_no=self.read_uint(reader), + first_index=self.read_uint(reader), + timestamp=self.read_int(reader) + ) + + if message_id == 90: + return IOSSessionStart( + timestamp=self.read_uint(reader), + project_id=self.read_uint(reader), + tracker_version=self.read_string(reader), + rev_id=self.read_string(reader), + user_uuid=self.read_string(reader), + user_os=self.read_string(reader), + user_os_version=self.read_string(reader), + user_device=self.read_string(reader), + user_device_type=self.read_string(reader), + user_country=self.read_string(reader) + ) + + if message_id == 91: + return IOSSessionEnd( + timestamp=self.read_uint(reader) + ) + + if message_id == 92: + return IOSMetadata( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + key=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 93: + return IOSCustomEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 94: + return IOSUserID( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader) + ) + + if message_id == 95: + return IOSUserAnonymousID( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader) + ) + + if message_id == 96: + return IOSScreenChanges( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + x=self.read_uint(reader), + y=self.read_uint(reader), + width=self.read_uint(reader), + height=self.read_uint(reader) + ) + + if message_id == 97: + return IOSCrash( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + reason=self.read_string(reader), + stacktrace=self.read_string(reader) + ) + + if message_id == 98: + return IOSScreenEnter( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + title=self.read_string(reader), + view_name=self.read_string(reader) + ) + + if message_id == 99: + return IOSScreenLeave( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + title=self.read_string(reader), + view_name=self.read_string(reader) + ) + + if message_id == 100: + return IOSClickEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + label=self.read_string(reader), + x=self.read_uint(reader), + y=self.read_uint(reader) + ) + + if message_id == 101: + return IOSInputEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader), + value_masked=self.read_boolean(reader), + label=self.read_string(reader) + ) + + if message_id == 102: + return IOSPreformanceEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_uint(reader) + ) + + if message_id == 103: + return IOSLog( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + severity=self.read_string(reader), + content=self.read_string(reader) + ) + + if message_id == 104: + return IOSInternalError( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + content=self.read_string(reader) + ) + + if message_id == 105: + return IOSNetworkCall( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + duration=self.read_uint(reader), + headers=self.read_string(reader), + body=self.read_string(reader), + url=self.read_string(reader), + success=self.read_boolean(reader), + method=self.read_string(reader), + status=self.read_uint(reader) + ) + + if message_id == 107: + return IOSBatchMeta( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + first_index=self.read_uint(reader) + ) + + if message_id == 110: + return IOSPerformanceAggregated( + timestamp_start=self.read_uint(reader), + timestamp_end=self.read_uint(reader), + min_fps=self.read_uint(reader), + avg_fps=self.read_uint(reader), + max_fps=self.read_uint(reader), + min_cpu=self.read_uint(reader), + avg_cpu=self.read_uint(reader), + max_cpu=self.read_uint(reader), + min_memory=self.read_uint(reader), + avg_memory=self.read_uint(reader), + max_memory=self.read_uint(reader), + min_battery=self.read_uint(reader), + avg_battery=self.read_uint(reader), + max_battery=self.read_uint(reader) + ) + if message_id == 111: + return IOSIssueEvent( + timestamp=self.read_uint(reader), + type=self.read_string(reader), + context_string=self.read_string(reader), + context=self.read_string(reader), + payload=self.read_string(reader) + ) + + def read_message_id(self, reader: io.BytesIO) -> int: + """ + Read and return the first byte where the message id is encoded + """ + id_ = self.read_uint(reader) + return id_ + + @staticmethod + def check_message_id(b: bytes) -> int: + """ + todo: make it static and without reader. It's just the first byte + Read and return the first byte where the message id is encoded + """ + reader = io.BytesIO(b) + id_ = Codec.read_uint(reader) + + return id_ + + @staticmethod + def decode_key(b) -> int: + """ + Decode the message key (encoded with little endian) + """ + try: + decoded = int.from_bytes(b, "little", signed=False) + except Exception as e: + raise UnicodeDecodeError(f"Error while decoding message key (SessionID) from {b}\n{e}") + return decoded diff --git a/msgcodec/messages.py b/msgcodec/messages.py new file mode 100644 index 000000000..bc451b287 --- /dev/null +++ b/msgcodec/messages.py @@ -0,0 +1,919 @@ +""" +Representations of Kafka messages +""" +from abc import ABC + + +class Message(ABC): + pass + + +class Timestamp(Message): + __id__ = 0 + + def __init__(self, timestamp): + self.timestamp = timestamp + + +class SessionStart(Message): + __id__ = 1 + + def __init__(self, timestamp, project_id, tracker_version, rev_id, user_uuid, + user_agent, user_os, user_os_version, user_browser, user_browser_version, + user_device, user_device_type, user_device_memory_size, user_device_heap_size, + user_country): + self.timestamp = timestamp + self.project_id = project_id + self.tracker_version = tracker_version + self.rev_id = rev_id + self.user_uuid = user_uuid + self.user_agent = user_agent + self.user_os = user_os + self.user_os_version = user_os_version + self.user_browser = user_browser + self.user_browser_version = user_browser_version + self.user_device = user_device + self.user_device_type = user_device_type + self.user_device_memory_size = user_device_memory_size + self.user_device_heap_size = user_device_heap_size + self.user_country = user_country + + +class SessionDisconnect(Message): + __id__ = 2 + + def __init__(self, timestamp): + self.timestamp = timestamp + + +class SessionEnd(Message): + __id__ = 3 + __name__ = 'SessionEnd' + + def __init__(self, timestamp): + self.timestamp = timestamp + + +class SetPageLocation(Message): + __id__ = 4 + + def __init__(self, url, referrer, navigation_start): + self.url = url + self.referrer = referrer + self.navigation_start = navigation_start + + +class SetViewportSize(Message): + __id__ = 5 + + def __init__(self, width, height): + self.width = width + self.height = height + + +class SetViewportScroll(Message): + __id__ = 6 + + def __init__(self, x, y): + self.x = x + self.y = y + + +class CreateDocument(Message): + __id__ = 7 + + +class CreateElementNode(Message): + __id__ = 8 + + def __init__(self, id, parent_id, index, tag, svg): + self.id = id + self.parent_id = parent_id, + self.index = index + self.tag = tag + self.svg = svg + + +class CreateTextNode(Message): + __id__ = 9 + + def __init__(self, id, parent_id, index): + self.id = id + self.parent_id = parent_id + self.index = index + + +class MoveNode(Message): + __id__ = 10 + + def __init__(self, id, parent_id, index): + self.id = id + self.parent_id = parent_id + self.index = index + + +class RemoveNode(Message): + __id__ = 11 + + def __init__(self, id): + self.id = id + + +class SetNodeAttribute(Message): + __id__ = 12 + + def __init__(self, id, name: str, value: str): + self.id = id + self.name = name + self.value = value + + +class RemoveNodeAttribute(Message): + __id__ = 13 + + def __init__(self, id, name: str): + self.id = id + self.name = name + + +class SetNodeData(Message): + __id__ = 14 + + def __init__(self, id, data: str): + self.id = id + self.data = data + + +class SetCSSData(Message): + __id__ = 15 + + def __init__(self, id, data: str): + self.id = id + self.data = data + + +class SetNodeScroll(Message): + __id__ = 16 + + def __init__(self, id, x: int, y: int): + self.id = id + self.x = x + self.y = y + + +class SetInputTarget(Message): + __id__ = 17 + + def __init__(self, id, label: str): + self.id = id + self.label = label + + +class SetInputValue(Message): + __id__ = 18 + + def __init__(self, id, value: str, mask: int): + self.id = id + self.value = value + self.mask = mask + + +class SetInputChecked(Message): + __id__ = 19 + + def __init__(self, id, checked: bool): + self.id = id + self.checked = checked + + +class MouseMove(Message): + __id__ = 20 + + def __init__(self, x, y): + self.x = x + self.y = y + + +class MouseClickDepricated(Message): + __id__ = 21 + + def __init__(self, id, hesitation_time, label: str): + self.id = id + self.hesitation_time = hesitation_time + self.label = label + + +class ConsoleLog(Message): + __id__ = 22 + + def __init__(self, level: str, value: str): + self.level = level + self.value = value + + +class PageLoadTiming(Message): + __id__ = 23 + + def __init__(self, request_start, response_start, response_end, dom_content_loaded_event_start, + dom_content_loaded_event_end, load_event_start, load_event_end, + first_paint, first_contentful_paint): + self.request_start = request_start + self.response_start = response_start + self.response_end = response_end + self.dom_content_loaded_event_start = dom_content_loaded_event_start + self.dom_content_loaded_event_end = dom_content_loaded_event_end + self.load_event_start = load_event_start + self.load_event_end = load_event_end + self.first_paint = first_paint + self.first_contentful_paint = first_contentful_paint + + +class PageRenderTiming(Message): + __id__ = 24 + + def __init__(self, speed_index, visually_complete, time_to_interactive): + self.speed_index = speed_index + self.visually_complete = visually_complete + self.time_to_interactive = time_to_interactive + +class JSException(Message): + __id__ = 25 + + def __init__(self, name: str, message: str, payload: str): + self.name = name + self.message = message + self.payload = payload + + +class RawErrorEvent(Message): + __id__ = 26 + + def __init__(self, timestamp, source: str, name: str, message: str, + payload: str): + self.timestamp = timestamp + self.source = source + self.name = name + self.message = message + self.payload = payload + + +class RawCustomEvent(Message): + __id__ = 27 + + def __init__(self, name: str, payload: str): + self.name = name + self.payload = payload + + +class UserID(Message): + __id__ = 28 + + def __init__(self, id: str): + self.id = id + + +class UserAnonymousID(Message): + __id__ = 29 + + def __init__(self, id: str): + self.id = id + + +class Metadata(Message): + __id__ = 30 + + def __init__(self, key: str, value: str): + self.key = key + self.value = value + + +class PerformanceTrack(Message): + __id__ = 49 + + def __init__(self, frames: int, ticks: int, total_js_heap_size, + used_js_heap_size): + self.frames = frames + self.ticks = ticks + self.total_js_heap_size = total_js_heap_size + self.used_js_heap_size = used_js_heap_size + + +class PageEvent(Message): + __id__ = 31 + + def __init__(self, message_id, timestamp, url: str, referrer: str, + loaded: bool, request_start, response_start, response_end, + dom_content_loaded_event_start, dom_content_loaded_event_end, + load_event_start, load_event_end, first_paint, first_contentful_paint, + speed_index, visually_complete, time_to_interactive): + self.message_id = message_id + self.timestamp = timestamp + self.url = url + self.referrer = referrer + self.loaded = loaded + self.request_start = request_start + self.response_start = response_start + self.response_end = response_end + self.dom_content_loaded_event_start = dom_content_loaded_event_start + self.dom_content_loaded_event_end = dom_content_loaded_event_end + self.load_event_start = load_event_start + self.load_event_end = load_event_end + self.first_paint = first_paint + self.first_contentful_paint = first_contentful_paint + self.speed_index = speed_index + self.visually_complete = visually_complete + self.time_to_interactive = time_to_interactive + + +class InputEvent(Message): + __id__ = 32 + + def __init__(self, message_id, timestamp, value: str, value_masked: bool, label: str): + self.message_id = message_id + self.timestamp = timestamp + self.value = value + self.value_masked = value_masked + self.label = label + + +class ClickEvent(Message): + __id__ = 33 + + def __init__(self, message_id, timestamp, hesitation_time, label: str): + self.message_id = message_id + self.timestamp = timestamp + self.hesitation_time = hesitation_time + self.label = label + + +class ErrorEvent(Message): + __id__ = 34 + + def __init__(self, message_id, timestamp, source: str, name: str, message: str, + payload: str): + self.message_id = message_id + self.timestamp = timestamp + self.source = source + self.name = name + self.message = message + self.payload = payload + + +class ResourceEvent(Message): + __id__ = 35 + + def __init__(self, message_id, timestamp, duration, ttfb, header_size, encoded_body_size, + decoded_body_size, url: str, type: str, success: bool, method: str, status): + self.message_id = message_id + self.timestamp = timestamp + self.duration = duration + self.ttfb = ttfb + self.header_size = header_size + self.encoded_body_size = encoded_body_size + self.decoded_body_size = decoded_body_size + self.url = url + self.type = type + self.success = success + self.method = method + self.status = status + + +class CustomEvent(Message): + __id__ = 36 + + def __init__(self, message_id, timestamp, name: str, payload: str): + self.message_id = message_id + self.timestamp = timestamp + self.name = name + self.payload = payload + + +class CSSInsertRule(Message): + __id__ = 37 + + def __init__(self, id, rule: str, index): + self.id = id + self.rule = rule + self.index = index + + +class CSSDeleteRule(Message): + __id__ = 38 + + def __init__(self, id, index): + self.id = id + self.index = index + + +class Fetch(Message): + __id__ = 39 + + def __init__(self, method: str, url: str, request: str, response: str, status, + timestamp, duration): + self.method = method + self.url = url + self.request = request + self.response = response + self.status = status + self.timestamp = timestamp + self.duration = duration + + +class Profiler(Message): + __id__ = 40 + + def __init__(self, name: str, duration, args: str, result: str): + self.name = name + self.duration = duration + self.args = args + self.result = result + + +class OTable(Message): + __id__ = 41 + + def __init__(self, key: str, value: str): + self.key = key + self.value = value + + +class StateAction(Message): + __id__ = 42 + + def __init__(self, type: str): + self.type = type + + +class StateActionEvent(Message): + __id__ = 43 + + def __init__(self, message_id, timestamp, type: str): + self.message_id = message_id + self.timestamp = timestamp + self.type = type + + +class Redux(Message): + __id__ = 44 + + def __init__(self, action: str, state: str, duration): + self.action = action + self.state = state + self.duration = duration + + +class Vuex(Message): + __id__ = 45 + + def __init__(self, mutation: str, state: str): + self.mutation = mutation + self.state = state + + +class MobX(Message): + __id__ = 46 + + def __init__(self, type: str, payload: str): + self.type = type + self.payload = payload + + +class NgRx(Message): + __id__ = 47 + + def __init__(self, action: str, state: str, duration): + self.action = action + self.state = state + self.duration = duration + + +class GraphQL(Message): + __id__ = 48 + + def __init__(self, operation_kind: str, operation_name: str, + variables: str, response: str): + self.operation_kind = operation_kind + self.operation_name = operation_name + self.variables = variables + self.response = response + + +class PerformanceTrack(Message): + __id__ = 49 + + def __init__(self, frames: int, ticks: int, + total_js_heap_size, used_js_heap_size): + self.frames = frames + self.ticks = ticks + self.total_js_heap_size = total_js_heap_size + self.used_js_heap_size = used_js_heap_size + + +class GraphQLEvent(Message): + __id__ = 50 + + def __init__(self, message_id, timestamp, name: str): + self.message_id = message_id + self.timestamp = timestamp + self.name = name + + +class FetchEvent(Message): + __id__ = 51 + + def __init__(self, message_id, timestamp, method: str, url, request, response: str, + status, duration): + self.message_id = message_id + self.timestamp = timestamp + self.method = method + self.url = url + self.request = request + self.response = response + self.status = status + self.duration = duration + + +class DomDrop(Message): + __id__ = 52 + + def __init__(self, timestamp): + self.timestamp = timestamp + + +class ResourceTiming(Message): + __id__ = 53 + + def __init__(self, timestamp, duration, ttfb, header_size, encoded_body_size, + decoded_body_size, url, initiator): + self.timestamp = timestamp + self.duration = duration + self.ttfb = ttfb + self.header_size = header_size + self.encoded_body_size = encoded_body_size + self.decoded_body_size = decoded_body_size + self.url = url + self.initiator = initiator + + +class ConnectionInformation(Message): + __id__ = 54 + + def __init__(self, downlink, type: str): + self.downlink = downlink + self.type = type + + +class SetPageVisibility(Message): + __id__ = 55 + + def __init__(self, hidden: bool): + self.hidden = hidden + + +class PerformanceTrackAggr(Message): + __id__ = 56 + + def __init__(self, timestamp_start, timestamp_end, min_fps, avg_fps, + max_fps, min_cpu, avg_cpu, max_cpu, + min_total_js_heap_size, avg_total_js_heap_size, + max_total_js_heap_size, min_used_js_heap_size, + avg_used_js_heap_size, max_used_js_heap_size + ): + self.timestamp_start = timestamp_start + self.timestamp_end = timestamp_end + self.min_fps = min_fps + self.avg_fps = avg_fps + self.max_fps = max_fps + self.min_cpu = min_cpu + self.avg_cpu = avg_cpu + self.max_cpu = max_cpu + self.min_total_js_heap_size = min_total_js_heap_size + self.avg_total_js_heap_size = avg_total_js_heap_size + self.max_total_js_heap_size = max_total_js_heap_size + self.min_used_js_heap_size = min_used_js_heap_size + self.avg_used_js_heap_size = avg_used_js_heap_size + self.max_used_js_heap_size = max_used_js_heap_size + + +class LongTask(Message): + __id__ = 59 + + def __init__(self, timestamp, duration, context, container_type, container_src: str, + container_id: str, container_name: str): + self.timestamp = timestamp + self.duration = duration + self.context = context + self.container_type = container_type + self.container_src = container_src + self.container_id = container_id + self.container_name = container_name + + +class SetNodeURLBasedAttribute(Message): + __id__ = 60 + + def __init__(self, id, name: str, value: str, base_url: str): + self.id = id + self.name = name + self.value = value + self.base_url = base_url + + +class SetStyleData(Message): + __id__ = 61 + + def __init__(self, id, data: str, base_url: str): + self.id = id + self.data = data + self.base_url = base_url + + +class IssueEvent(Message): + __id__ = 62 + + def __init__(self, message_id, timestamp, type: str, context_string: str, + context: str, payload: str): + self.message_id = message_id + self.timestamp = timestamp + self.type = type + self.context_string = context_string + self.context = context + self.payload = payload + + +class TechnicalInfo(Message): + __id__ = 63 + + def __init__(self, type: str, value: str): + self.type = type + self.value = value + + +class CustomIssue(Message): + __id__ = 64 + + def __init__(self, name: str, payload: str): + self.name = name + self.payload = payload + + +class PageClose(Message): + __id__ = 65 + + +class AssetCache(Message): + __id__ = 66 + + def __init__(self, url): + self.url = url + + +class CSSInsertRuleURLBased(Message): + __id__ = 67 + + def __init__(self, id, rule, index, base_url): + self.id = id + self.rule = rule + self.index = index + self.base_url = base_url + + +class MouseClick(Message): + __id__ = 69 + + def __init__(self, id, hesitation_time, label: str, selector): + self.id = id + self.hesitation_time = hesitation_time + self.label = label + self.selector = selector + + +class CreateIFrameDocument(Message): + __id__ = 70 + + def __init__(self, frame_id, id): + self.frame_id = frame_id + self.id = id + + +class BatchMeta(Message): + __id__ = 80 + + def __init__(self, page_no, first_index, timestamp): + self.page_no = page_no + self.first_index = first_index + self.timestamp = timestamp + +class IOSSessionStart(Message): + __id__ = 90 + + def __init__(self, timestamp, project_id, tracker_version: str, + rev_id: str, user_uuid: str, user_os: str, user_os_version: str, + user_device: str, user_device_type: str, user_country: str): + self.timestamp = timestamp + self.project_id = project_id + self.tracker_version = tracker_version + self.rev_id = rev_id + self.user_uuid = user_uuid + self.user_os = user_os + self.user_os_version = user_os_version + self.user_device = user_device + self.user_device_type = user_device_type + self.user_country = user_country + + +class IOSSessionEnd(Message): + __id__ = 91 + + def __init__(self, timestamp): + self.timestamp = timestamp + + +class IOSMetadata(Message): + __id__ = 92 + + def __init__(self, timestamp, length, key: str, value: str): + self.timestamp = timestamp + self.length = length + self.key = key + self.value = value + + +class IOSCustomEvent(Message): + __id__ = 93 + + def __init__(self, timestamp, length, name: str, payload: str): + self.timestamp = timestamp + self.length = length + self.name = name + self.payload = payload + + +class IOSUserID(Message): + __id__ = 94 + + def __init__(self, timestamp, length, value: str): + self.timestamp = timestamp + self.length = length + self.value = value + + +class IOSUserAnonymousID(Message): + __id__ = 95 + + def __init__(self, timestamp, length, value: str): + self.timestamp = timestamp + self.length = length + self.value = value + + +class IOSScreenChanges(Message): + __id__ = 96 + + def __init__(self, timestamp, length, x, y, width, height): + self.timestamp = timestamp + self.length = length + self.x = x + self.y = y + self.width = width + self.height = height + + +class IOSCrash(Message): + __id__ = 97 + + def __init__(self, timestamp, length, name: str, reason: str, stacktrace): + self.timestamp = timestamp + self.length = length + self.name = name + self.reason = reason + self.stacktrace = stacktrace + + +class IOSScreenEnter(Message): + __id__ = 98 + + def __init__(self, timestamp, length, title, view_name): + self.timestamp = timestamp + self.length = length + self.title = title + self.view_name = view_name + + +class IOSScreenLeave(Message): + __id__ = 99 + + def __init__(self, timestamp, length, title: str, view_name: str): + self.timestamp = timestamp + self.length = length + self.title = title + self.view_name = view_name + + +class IOSClickEvent(Message): + __id__ = 100 + + def __init__(self, timestamp, length, label, x, y): + self.timestamp = timestamp + self.length = length + self.label = label + self.x = x + self.y = y + + +class IOSInputEvent(Message): + __id__ = 101 + + def __init__(self, timestamp, length, value: str, value_masked: bool, label: str): + self.timestamp = timestamp + self.length = length + self.value_masked = value_masked + self.label = label + + +class IOSPerformanceEvent(Message): + __id__ = 102 + + def __init__(self, timestamp, length, name: str, value): + self.timestamp = timestamp + self.length = length + self.name = name + self.value = value + + +class IOSLog(Message): + __id__ = 103 + + def __init__(self, timestamp, length, severity: str, content: str): + self.timestamp = timestamp + self.length = length + self.severity = severity + self.content = content + + +class IOSInternalError(Message): + __id__ = 104 + + def __init__(self, timestamp, length, content: str): + self.timestamp = timestamp + self.length = length + self.content = content + + +class IOSNetworkCall(Message): + __id__ = 105 + + def __init__(self, timestamp, length, duration, headers, body, url, success: bool, method: str, status): + self.timestamp = timestamp + self.length = length + self.duration = duration + self.headers = headers + self.body = body + self.url = url + self.success = success + self.method = method + self.status = status + + +class IOSBatchMeta(Message): + __id__ = 107 + + def __init__(self, timestamp, length, first_index): + self.timestamp = timestamp + self.length = length + self.first_index = first_index + + +class IOSPerformanceAggregated(Message): + __id__ = 110 + + def __init__(self, timestamp_start, timestamp_end, min_fps, avg_fps, + max_fps, min_cpu, avg_cpu, max_cpu, + min_memory, avg_memory, max_memory, + min_battery, avg_battery, max_battery + ): + self.timestamp_start = timestamp_start + self.timestamp_end = timestamp_end + self.min_fps = min_fps + self.avg_fps = avg_fps + self.max_fps = max_fps + self.min_cpu = min_cpu + self.avg_cpu = avg_cpu + self.max_cpu = max_cpu + self.min_memory = min_memory + self.avg_memory = avg_memory + self.max_memory = max_memory + self.min_battery = min_battery + self.avg_battery = avg_battery + self.max_battery = max_battery + + +class IOSIssueEvent(Message): + __id__ = 111 + + def __init__(self, timestamp, type: str, context_string: str, context: str, payload: str): + self.timestamp = timestamp + self.type = type + self.context_string = context_string + self.context = context + self.payload = payload diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..d328a9142 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +kafka-python diff --git a/run_quickwit.sh b/run_quickwit.sh new file mode 100644 index 000000000..a00b1046d --- /dev/null +++ b/run_quickwit.sh @@ -0,0 +1 @@ +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config-listen.yaml:/quickwit/s3-config-listen.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 -p 127.0.0.1:7280:7280 quickwit/quickwit run --config s3-config-listen.yaml diff --git a/s3-config-listen.yaml b/s3-config-listen.yaml new file mode 100644 index 000000000..524339f2f --- /dev/null +++ b/s3-config-listen.yaml @@ -0,0 +1,5 @@ +## In order to save data into S3 +version: 0 +metastore_uri: s3://quickwit/quickwit-indexes +default_index_root_uri: s3://quickwit/quickwit-indexes +listen_address: 0.0.0.0 diff --git a/s3-config.yaml b/s3-config.yaml new file mode 100644 index 000000000..5cf727b28 --- /dev/null +++ b/s3-config.yaml @@ -0,0 +1,4 @@ +## In order to save data into S3 +version: 0 +metastore_uri: s3://quickwit/quickwit-indexes +default_index_root_uri: s3://quickwit/quickwit-indexes From 42a9d80a0de90bf0617b79ca017106d1ade6677e Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 13:59:44 +0200 Subject: [PATCH 14/33] Fixed clean error --- clean.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clean.sh b/clean.sh index 81469034c..6101f890b 100644 --- a/clean.sh +++ b/clean.sh @@ -1,2 +1,2 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml -docker run -v /etc/hosts:/etc/hosts:ro -v -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml $(pwd)/qwdata:/quickwit/qwdata quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/qwdata:/quickwit/qwdata -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml From c1620db523cc1d2d1c283eb5c59645bdad17209c Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 14:06:23 +0200 Subject: [PATCH 15/33] updated bash files: deleted unnecessary commands --- clean.sh | 4 ++-- create_kafka_index.sh | 2 +- create_source.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clean.sh b/clean.sh index 6101f890b..9dbf56926 100644 --- a/clean.sh +++ b/clean.sh @@ -1,2 +1,2 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/qwdata:/quickwit/qwdata -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml diff --git a/create_kafka_index.sh b/create_kafka_index.sh index 651b3983c..e2669c6ca 100644 --- a/create_kafka_index.sh +++ b/create_kafka_index.sh @@ -1 +1 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/index-config.yaml:/quickwit/index-config.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index create --index-config index-config.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config.yaml:/quickwit/index-config.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index create --index-config index-config.yaml --config s3-config.yaml diff --git a/create_source.sh b/create_source.sh index b1c37c020..767c60342 100644 --- a/create_source.sh +++ b/create_source.sh @@ -1 +1 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/kafka-source.yaml:/quickwit/kafka-source.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source create --index quickwit-kafka --source-config kafka-source.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/kafka-source.yaml:/quickwit/kafka-source.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source create --index quickwit-kafka --source-config kafka-source.yaml --config s3-config.yaml From e9961b16037039a1dd5a59b8b722dba24e3da707 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 14:19:06 +0200 Subject: [PATCH 16/33] Added some comments and notes --- kafka-source.yaml | 1 + s3-config-listen.yaml | 1 + s3-config.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/kafka-source.yaml b/kafka-source.yaml index 89b6174cf..12f667dee 100644 --- a/kafka-source.yaml +++ b/kafka-source.yaml @@ -7,3 +7,4 @@ params: topic: quickwit-kafka client_params: bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + # security.protocol: SSL diff --git a/s3-config-listen.yaml b/s3-config-listen.yaml index 524339f2f..f6065e927 100644 --- a/s3-config-listen.yaml +++ b/s3-config-listen.yaml @@ -1,4 +1,5 @@ ## In order to save data into S3 +# metastore also accepts s3://{bucket/path}#pooling_interval={seconds}s version: 0 metastore_uri: s3://quickwit/quickwit-indexes default_index_root_uri: s3://quickwit/quickwit-indexes diff --git a/s3-config.yaml b/s3-config.yaml index 5cf727b28..2fa1e20d7 100644 --- a/s3-config.yaml +++ b/s3-config.yaml @@ -1,4 +1,5 @@ ## In order to save data into S3 +# metastore also accepts s3://{bucket/path}#pooling_interval={seconds}s version: 0 metastore_uri: s3://quickwit/quickwit-indexes default_index_root_uri: s3://quickwit/quickwit-indexes From ea21040ccc364e66879bb86b540edf4ab677e083 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 15:56:33 +0200 Subject: [PATCH 17/33] aws_region set as environment variable --- clean.sh | 4 ++-- create_kafka_index.sh | 2 +- create_source.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clean.sh b/clean.sh index 9dbf56926..502db2a2e 100644 --- a/clean.sh +++ b/clean.sh @@ -1,2 +1,2 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml diff --git a/create_kafka_index.sh b/create_kafka_index.sh index e2669c6ca..24e76803a 100644 --- a/create_kafka_index.sh +++ b/create_kafka_index.sh @@ -1 +1 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config.yaml:/quickwit/index-config.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit index create --index-config index-config.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config.yaml:/quickwit/index-config.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index create --index-config index-config.yaml --config s3-config.yaml diff --git a/create_source.sh b/create_source.sh index 767c60342..77aab1937 100644 --- a/create_source.sh +++ b/create_source.sh @@ -1 +1 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/kafka-source.yaml:/quickwit/kafka-source.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 quickwit/quickwit source create --index quickwit-kafka --source-config kafka-source.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/kafka-source.yaml:/quickwit/kafka-source.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source create --index quickwit-kafka --source-config kafka-source.yaml --config s3-config.yaml From df468f38e5f520dd4bdbc2ff99d31bc00d880647 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 16:58:56 +0200 Subject: [PATCH 18/33] Added README.md --- README.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..7b566ed13 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# Quickwit for kafka messages (S3 storage) + +## index +1. [Setup](#setup) +2. [Deploy](#deploy) + +## Setup +This setup is made using Docker, make changes to the files accordingly to run it locally. + +In order to connect to AWS S3 service the aws credentials must be defined in the environment +```bash +export aws_access_key_id={your_aws_access_key_id} +export aws_secret_access_key={your_aws_secret_access_key} +export aws_region={bucket_region} +``` +In the file kafka-source.yaml replace the bootstap.server with the address of your kafka service and uncomment to activate ssl protocol if needed. +## Deploy + +To create the index 'quickwit-kafka' run the command: +```bash +bash create_kafka_index.sh +``` +Having the topic 'quickwit-kafka' in the kafka server defined in the kafka-source.yaml, the connection between the created index and the topic can be achieved by running the command: +```bash +bash create_source.sh +``` +To delete both the index and the source connection run the command: +```bash +bash clean.sh +``` + +To deploy the indexer, search and UI services run the command: +```bash +bash run_quickwit.sh +``` +UI server will start at localhost:7280. The api can also be called through the url http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query={your_query} for example +```angular2html +curl "http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query=body:error" +``` \ No newline at end of file From 7b77a0e90e7e4743ada5437887fa7d5068b75dc6 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 27 Jul 2022 17:57:25 +0200 Subject: [PATCH 19/33] Updated JSON keys and index conf --- index-config.yaml | 26 +++++++++++++++++++++++--- kafka_sample.py | 34 +++++++++------------------------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/index-config.yaml b/index-config.yaml index 43f8d3324..8e68eb73d 100644 --- a/index-config.yaml +++ b/index-config.yaml @@ -8,14 +8,34 @@ index_id: quickwit-kafka doc_mapping: field_mappings: - - name: title + - name: method type: text tokenizer: default record: position - - name: body + - name: url + type: text + tokenizer: default + record: position + - name: request + type: text + tokenizer: default + record: position + - name: response + type: text + tokenizer: default + record: position + - name: status + type: text + tokenizer: default + record: position + - name: timestamp + type: text + tokenizer: default + record: position + - name: duration type: text tokenizer: default record: position search_settings: - default_search_fields: [title, body] + default_search_fields: [url, request, response] diff --git a/kafka_sample.py b/kafka_sample.py index 9d58ae54d..0ebce304c 100644 --- a/kafka_sample.py +++ b/kafka_sample.py @@ -11,11 +11,14 @@ import json import getopt, sys n = 0 - +fetch_keys = ['method', 'url', 'request', 'response', 'status', 'timestamp', 'duration'] def transform(data): global n n += 1 - return {'title': f'message {n}', 'body': data} + return { + 'method': data.method, 'url': data.url, 'request': data.url, 'response': data.request, + 'status': data.status, 'timestamp': data.timestamp, 'duration': data.duration + } def create_producer(): producer = KafkaProducer(#security_protocol="SSL", @@ -32,7 +35,7 @@ def create_consumer(): consumer = KafkaConsumer(#security_protocol="SSL", bootstrap_servers=os.environ['KAFKA_SERVER_2'], # os.environ['KAFKA_SERVER_1']], - group_id=f"my_test52_connector", + group_id=f"quickwit_connector", auto_offset_reset="earliest", enable_auto_commit=False ) @@ -41,11 +44,6 @@ def create_consumer(): def consumer_producer_end(): global n - batch_size = 4000 - sessions_batch_size = 400 - batch = [] - sessions = defaultdict(lambda: None) - sessions_batch = [] codec = MessageCodec() consumer = create_consumer() @@ -60,27 +58,13 @@ def consumer_producer_end(): if messages is None: print('-') for message in messages: - send = True - if isinstance(message, Fetch): - data = message.response - elif isinstance(message, FetchEvent): - data = message.response - elif isinstance(message, PageEvent): - print(message.url) - elif isinstance(message, SetCSSData): - data = message.data - elif isinstance(message, SetStyleData): - data = message.data - else: - send = False - continue - if send: - producer.send('quickwit-kafka', value=transform(data)) + send = False + if isinstance(message, Fetch) or isinstance(message, FetchEvent): + producer.send('quickwit-kafka', value=transform(message)) print(f'added message {n}') sleep(5) - def consumer_end(): consumer = create_consumer() consumer.subscribe(topics=['quickwit-kafka']) From c38255308a7564f6ab2f796a797c24b693592d76 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 29 Jul 2022 12:13:47 +0200 Subject: [PATCH 20/33] Added new Indexes, updated source and python test of quickwit --- README.md | 2 +- index-config.yaml => index-config-fetch.yaml | 17 +++-- index-config-graphql.yaml | 30 +++++++++ index-config-pageevent.yaml | 68 ++++++++++++++++++++ kafka_sample.py | 39 +++++++++-- sources.yaml | 29 +++++++++ 6 files changed, 170 insertions(+), 15 deletions(-) rename index-config.yaml => index-config-fetch.yaml (74%) create mode 100644 index-config-graphql.yaml create mode 100644 index-config-pageevent.yaml create mode 100644 sources.yaml diff --git a/README.md b/README.md index 7b566ed13..f5110c7ee 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,6 @@ To deploy the indexer, search and UI services run the command: bash run_quickwit.sh ``` UI server will start at localhost:7280. The api can also be called through the url http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query={your_query} for example -```angular2html +```bash curl "http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query=body:error" ``` \ No newline at end of file diff --git a/index-config.yaml b/index-config-fetch.yaml similarity index 74% rename from index-config.yaml rename to index-config-fetch.yaml index 8e68eb73d..1a7577450 100644 --- a/index-config.yaml +++ b/index-config-fetch.yaml @@ -4,9 +4,10 @@ version: 0 -index_id: quickwit-kafka +index_id: fetch doc_mapping: + mode: strict field_mappings: - name: method type: text @@ -25,17 +26,15 @@ doc_mapping: tokenizer: default record: position - name: status - type: text - tokenizer: default + type: i64 + fast: true record: position - name: timestamp - type: text - tokenizer: default - record: position + type: i64 + fast: true - name: duration - type: text - tokenizer: default - record: position + type: i64 + fast: true search_settings: default_search_fields: [url, request, response] diff --git a/index-config-graphql.yaml b/index-config-graphql.yaml new file mode 100644 index 000000000..bac1d8406 --- /dev/null +++ b/index-config-graphql.yaml @@ -0,0 +1,30 @@ +# +# Index config file for gh-archive dataset. +# + +version: 0 + +index_id: graphql + +doc_mapping: + mode: strict + field_mappings: + - name: operation_kind + type: text + tokenizer: default + record: position + - name: operation_name + type: text + tokenizer: default + record: position + - name: variables + type: text + tokenizer: default + record: position + - name: response + type: text + tokenizer: default + record: position + +search_settings: + default_search_fields: [operation_kind, operation_name, variables] diff --git a/index-config-pageevent.yaml b/index-config-pageevent.yaml new file mode 100644 index 000000000..90d8d152f --- /dev/null +++ b/index-config-pageevent.yaml @@ -0,0 +1,68 @@ +# +# Index config file for gh-archive dataset. +# + +version: 0 + +index_id: pageevent + +doc_mapping: + mode: strict + field_mappings: + - name: message_id + type: i64 + fast: true + record: position + - name: timestamp + type: i64 + fast: true + - name: url + type: text + tokenizer: default + record: position + - name: referrer + type: text + tokenizer: default + record: position + - name: loaded + type: i64 + fast: true + - name: request_start + type: i64 + fast: true + - name: response_start + type: i64 + fast: true + - name: response_end + type: i64 + fast: true + - name: dom_content_loaded_event_start + type: i64 + fast: true + - name: dom_content_loaded_event_end + type: i64 + fast: true + - name: load_event_start + type: i64 + fast: true + - name: load_event_end + type: i64 + fast: true + - name: first_paint + type: i64 + fast: true + - name: first_contentful_paint + type: i64 + fast: true + - name: speed_index + type: i64 + fast: true + - name: visually_complete + type: i64 + fast: true + - name: time_to_interactive + type: i64 + fast: true + +search_settings: + default_search_fields: [url, referrer, visually_complete] diff --git a/kafka_sample.py b/kafka_sample.py index 0ebce304c..47e9f8001 100644 --- a/kafka_sample.py +++ b/kafka_sample.py @@ -5,14 +5,13 @@ from datetime import datetime from collections import defaultdict from msgcodec.codec import MessageCodec -from msgcodec.messages import SessionEnd, Fetch, FetchEvent, PageEvent, SetCSSData, SetStyleData +from msgcodec.messages import Fetch, FetchEvent, PageEvent, GraphQL import json import getopt, sys n = 0 -fetch_keys = ['method', 'url', 'request', 'response', 'status', 'timestamp', 'duration'] -def transform(data): +def transform_fetch(data): global n n += 1 return { @@ -20,6 +19,28 @@ def transform(data): 'status': data.status, 'timestamp': data.timestamp, 'duration': data.duration } +def transform_graphql(data): + global n + n += 1 + return { + 'operation_kind': data.operation_kind, 'operation_name': data.operation_name, + 'variables': data.variables, 'response': data.response + } + +def transform_pageevent(data): + global n + n += 1 + return {'massage_id': data.message_id, 'timestamp': data.timestamp, 'url': data.timestamp, + 'referrer': data.referrer, 'loaded': data.loaded, 'request_start': data.request_start, + 'response_start': data.response_start, 'response_end': data.response_end, + 'dom_content_loaded_event_start': data.dom_content_loaded_event_start, + 'dom_content_loaded_event_end': data.dom_content_loaded_event_end, + 'load_event_start': data.load_event_start, 'load_event_end': data.load_event_end, + 'first_paint': data.first_paint, 'first_contentful_paint': data.first_contentful_paint, + 'speed_index': data.speed_index, 'visually_complete': data.visually_complete, + 'time_to_interactive': data.time_to_interactive + } + def create_producer(): producer = KafkaProducer(#security_protocol="SSL", bootstrap_servers=os.environ['KAFKA_SERVER_2'], @@ -60,8 +81,16 @@ def consumer_producer_end(): for message in messages: send = False if isinstance(message, Fetch) or isinstance(message, FetchEvent): - producer.send('quickwit-kafka', value=transform(message)) - print(f'added message {n}') + producer.send('quickwit-kafka', value=transform_fetch(message)) + print(f'added message {n} type Fetch') + sleep(5) + if isinstance(message, GraphQL): + producer.send('quickwit-kafka', value=transform_graphql(message)) + print(f'added message {n} type GraphQL') + sleep(5) + if isinstance(message, PageEvent): + producer.send('quickwit-kafka', value=transform_pageevent(message)) + print(f'added message {n} type PageEvent') sleep(5) diff --git a/sources.yaml b/sources.yaml new file mode 100644 index 000000000..7dad76619 --- /dev/null +++ b/sources.yaml @@ -0,0 +1,29 @@ +# +# Source config file. +# + +sources: + - fetch: fetch-kafka + source_type: kafka + params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: fetch-consumer + # security.protocol: SSL + - graphql: graphql-kafka + source_type: kafka + params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: graphql-consumer + # security.protocol: SSL + - graphql: graphql-pageevent + source_type: kafka + params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: pageevent-consumer + # security.protocol: SSL From 125e5289be880b5d64f1b2668bb1d93fe0f98c24 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 29 Jul 2022 16:46:49 +0200 Subject: [PATCH 21/33] Updated methods. New indexes created in quickwit reading from same topic in kafka with different group id --- clean.sh | 10 +++++++-- create_kafka_index.sh | 4 +++- create_source.sh | 1 - create_sources.sh | 3 +++ index-config-fetch.yaml | 4 ++-- index-config-pageevent.yaml | 2 +- run_quickwit.sh | 2 +- kafka-source.yaml => source-fetch.yaml | 6 ++++-- source-graphql.yaml | 12 +++++++++++ source-pageevent.yaml | 12 +++++++++++ sources.yaml | 29 -------------------------- 11 files changed, 46 insertions(+), 39 deletions(-) delete mode 100644 create_source.sh create mode 100644 create_sources.sh rename kafka-source.yaml => source-fetch.yaml (69%) create mode 100644 source-graphql.yaml create mode 100644 source-pageevent.yaml delete mode 100644 sources.yaml diff --git a/clean.sh b/clean.sh index 502db2a2e..9ec3a5f7e 100644 --- a/clean.sh +++ b/clean.sh @@ -1,2 +1,8 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source delete --index quickwit-kafka --source kafka-source --config s3-config.yaml -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index delete --index quickwit-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source delete --index fetchevent --source fetch-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index delete --index fetchevent --config s3-config.yaml + +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source delete --index graphql --source graphql-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index delete --index graphql --config s3-config.yaml + +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source delete --index pageevent --source pageevent-kafka --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index delete --index pageevent --config s3-config.yaml diff --git a/create_kafka_index.sh b/create_kafka_index.sh index 24e76803a..efb165db6 100644 --- a/create_kafka_index.sh +++ b/create_kafka_index.sh @@ -1 +1,3 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config.yaml:/quickwit/index-config.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index create --index-config index-config.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config-fetch.yaml:/quickwit/index-config-fetch.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index create --index-config index-config-fetch.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config-graphql.yaml:/quickwit/index-config-graphql.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index create --index-config index-config-graphql.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/index-config-pageevent.yaml:/quickwit/index-config-pageevent.yaml -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit index create --index-config index-config-pageevent.yaml --config s3-config.yaml diff --git a/create_source.sh b/create_source.sh deleted file mode 100644 index 77aab1937..000000000 --- a/create_source.sh +++ /dev/null @@ -1 +0,0 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/kafka-source.yaml:/quickwit/kafka-source.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source create --index quickwit-kafka --source-config kafka-source.yaml --config s3-config.yaml diff --git a/create_sources.sh b/create_sources.sh new file mode 100644 index 000000000..c854899c3 --- /dev/null +++ b/create_sources.sh @@ -0,0 +1,3 @@ +docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/source-fetch.yaml:/quickwit/source-fetch.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source create --index fetchevent --source-config source-fetch.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/source-graphql.yaml:/quickwit/source-graphql.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source create --index graphql --source-config source-graphql.yaml --config s3-config.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config.yaml:/quickwit/s3-config.yaml -v $(pwd)/source-pageevent.yaml:/quickwit/source-pageevent.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region quickwit/quickwit source create --index pageevent --source-config source-pageevent.yaml --config s3-config.yaml diff --git a/index-config-fetch.yaml b/index-config-fetch.yaml index 1a7577450..1d89f72c9 100644 --- a/index-config-fetch.yaml +++ b/index-config-fetch.yaml @@ -4,7 +4,7 @@ version: 0 -index_id: fetch +index_id: fetchevent doc_mapping: mode: strict @@ -27,8 +27,8 @@ doc_mapping: record: position - name: status type: i64 + indexed: true fast: true - record: position - name: timestamp type: i64 fast: true diff --git a/index-config-pageevent.yaml b/index-config-pageevent.yaml index 90d8d152f..36a0a69fe 100644 --- a/index-config-pageevent.yaml +++ b/index-config-pageevent.yaml @@ -11,8 +11,8 @@ doc_mapping: field_mappings: - name: message_id type: i64 + indexed: true fast: true - record: position - name: timestamp type: i64 fast: true diff --git a/run_quickwit.sh b/run_quickwit.sh index a00b1046d..b2afeb011 100644 --- a/run_quickwit.sh +++ b/run_quickwit.sh @@ -1 +1 @@ -docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/qwdata:/quickwit/qwdata -v $(pwd)/s3-config-listen.yaml:/quickwit/s3-config-listen.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=eu-central-1 -e AWS_REGION=eu-central-1 -p 127.0.0.1:7280:7280 quickwit/quickwit run --config s3-config-listen.yaml +docker run -v /etc/hosts:/etc/hosts:ro -v $(pwd)/s3-config-listen.yaml:/quickwit/s3-config-listen.yaml -e AWS_ACCESS_KEY_ID=$aws_access_key_id -e AWS_SECRET_ACCESS_KEY=$aws_secret_access_key -e AWS_DEFAULT_REGION=$aws_region -e AWS_REGION=$aws_region -p 127.0.0.1:7280:7280 quickwit/quickwit run --config s3-config-listen.yaml diff --git a/kafka-source.yaml b/source-fetch.yaml similarity index 69% rename from kafka-source.yaml rename to source-fetch.yaml index 12f667dee..506c234f6 100644 --- a/kafka-source.yaml +++ b/source-fetch.yaml @@ -1,10 +1,12 @@ # -# Kafka source config file. +# Source config file. # -source_id: kafka-source + +source_id: fetch-kafka source_type: kafka params: topic: quickwit-kafka client_params: bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: fetch-consumer # security.protocol: SSL diff --git a/source-graphql.yaml b/source-graphql.yaml new file mode 100644 index 000000000..900e0f92b --- /dev/null +++ b/source-graphql.yaml @@ -0,0 +1,12 @@ +# +# Source config file. +# + +source_id: graphql-kafka +source_type: kafka +params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: graphql-consumer + # security.protocol: SSL diff --git a/source-pageevent.yaml b/source-pageevent.yaml new file mode 100644 index 000000000..6c5582a96 --- /dev/null +++ b/source-pageevent.yaml @@ -0,0 +1,12 @@ +# +# Source config file. +# + +source_id: pageevent-kafka +source_type: kafka +params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: pageevent-consumer + # security.protocol: SSL diff --git a/sources.yaml b/sources.yaml deleted file mode 100644 index 7dad76619..000000000 --- a/sources.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# -# Source config file. -# - -sources: - - fetch: fetch-kafka - source_type: kafka - params: - topic: quickwit-kafka - client_params: - bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 - group.id: fetch-consumer - # security.protocol: SSL - - graphql: graphql-kafka - source_type: kafka - params: - topic: quickwit-kafka - client_params: - bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 - group.id: graphql-consumer - # security.protocol: SSL - - graphql: graphql-pageevent - source_type: kafka - params: - topic: quickwit-kafka - client_params: - bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 - group.id: pageevent-consumer - # security.protocol: SSL From c198f30039d532619c05b1a60b7a3ecbc8325250 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 29 Jul 2022 16:47:29 +0200 Subject: [PATCH 22/33] quickwit test script updated --- kafka_sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka_sample.py b/kafka_sample.py index 47e9f8001..cba7869ab 100644 --- a/kafka_sample.py +++ b/kafka_sample.py @@ -56,7 +56,7 @@ def create_consumer(): consumer = KafkaConsumer(#security_protocol="SSL", bootstrap_servers=os.environ['KAFKA_SERVER_2'], # os.environ['KAFKA_SERVER_1']], - group_id=f"quickwit_connector", + group_id=f"quickwit_connector2", auto_offset_reset="earliest", enable_auto_commit=False ) From 5f669bf275625073cbd1f038b0836c4603b660a0 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 29 Jul 2022 17:25:51 +0200 Subject: [PATCH 23/33] added gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..bf7d42865 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea +Pip* + From 2ad4f4ea124232274bc09ab1f1faf2581053e619 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Mon, 1 Aug 2022 10:48:14 +0200 Subject: [PATCH 24/33] changed topic name to quickwit, and kafka source uri for localhost --- source-fetch.yaml | 6 +++--- source-graphql.yaml | 6 +++--- source-pageevent.yaml | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source-fetch.yaml b/source-fetch.yaml index 506c234f6..9ea448e83 100644 --- a/source-fetch.yaml +++ b/source-fetch.yaml @@ -5,8 +5,8 @@ source_id: fetch-kafka source_type: kafka params: - topic: quickwit-kafka + topic: quickwit client_params: - bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + bootstrap.servers: localhost:9092 group.id: fetch-consumer - # security.protocol: SSL + security.protocol: SSL diff --git a/source-graphql.yaml b/source-graphql.yaml index 900e0f92b..0f0d8fa02 100644 --- a/source-graphql.yaml +++ b/source-graphql.yaml @@ -5,8 +5,8 @@ source_id: graphql-kafka source_type: kafka params: - topic: quickwit-kafka + topic: quickwit client_params: - bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + bootstrap.servers: localhost:9092 group.id: graphql-consumer - # security.protocol: SSL + security.protocol: SSL diff --git a/source-pageevent.yaml b/source-pageevent.yaml index 6c5582a96..0eca56956 100644 --- a/source-pageevent.yaml +++ b/source-pageevent.yaml @@ -5,8 +5,8 @@ source_id: pageevent-kafka source_type: kafka params: - topic: quickwit-kafka + topic: quickwit client_params: - bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + bootstrap.servers: localhost:9092 group.id: pageevent-consumer - # security.protocol: SSL + security.protocol: SSL From 4ca07f44002d3ac3e56a37503f8a93d0ad092f45 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Mon, 1 Aug 2022 14:48:15 +0200 Subject: [PATCH 25/33] Changed type loaded from Fetch from i64 to bool --- ee/quickwit/index-config-pageevent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/quickwit/index-config-pageevent.yaml b/ee/quickwit/index-config-pageevent.yaml index 36a0a69fe..e47dd6a1d 100644 --- a/ee/quickwit/index-config-pageevent.yaml +++ b/ee/quickwit/index-config-pageevent.yaml @@ -25,7 +25,7 @@ doc_mapping: tokenizer: default record: position - name: loaded - type: i64 + type: bool fast: true - name: request_start type: i64 From ab06c5c8e38135cb2d9b6bb428995107827871df Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Tue, 9 Aug 2022 10:40:14 +0200 Subject: [PATCH 26/33] added events detailed snowflakes --- .../sql/snowflake_events_detailed.sql | 238 ++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 ee/connectors/sql/snowflake_events_detailed.sql diff --git a/ee/connectors/sql/snowflake_events_detailed.sql b/ee/connectors/sql/snowflake_events_detailed.sql new file mode 100644 index 000000000..092a50ea3 --- /dev/null +++ b/ee/connectors/sql/snowflake_events_detailed.sql @@ -0,0 +1,238 @@ +CREATE TABLE IF NOT EXISTS connector_events_detailed +( + sessionid bigint, + clickevent_hesitationtime bigint, + clickevent_label text, + clickevent_messageid bigint, + clickevent_timestamp bigint, + connectioninformation_downlink bigint, + connectioninformation_type text, + consolelog_level text, + consolelog_value text, + cpuissue_duration bigint, + cpuissue_rate bigint, + cpuissue_timestamp bigint, + createdocument boolean, + createelementnode_id bigint, + createelementnode_parentid bigint, + cssdeleterule_index bigint, + cssdeleterule_stylesheetid bigint, + cssinsertrule_index bigint, + cssinsertrule_rule text, + cssinsertrule_stylesheetid bigint, + customevent_messageid bigint, + customevent_name text, + customevent_payload text, + customevent_timestamp bigint, + domdrop_timestamp bigint, + errorevent_message text, + errorevent_messageid bigint, + errorevent_name text, + errorevent_payload text, + errorevent_source text, + errorevent_timestamp bigint, + fetch_duration bigint, + fetch_method text, + fetch_request text, + fetch_response text, + fetch_status bigint, + fetch_timestamp bigint, + fetch_url text, + graphql_operationkind text, + graphql_operationname text, + graphql_response text, + graphql_variables text, + graphqlevent_messageid bigint, + graphqlevent_name text, + graphqlevent_timestamp bigint, + inputevent_label text, + inputevent_messageid bigint, + inputevent_timestamp bigint, + inputevent_value text, + inputevent_valuemasked boolean, + jsexception_message text, + jsexception_name text, + jsexception_payload text, + memoryissue_duration bigint, + memoryissue_rate bigint, + memoryissue_timestamp bigint, + metadata_key text, + metadata_value text, + mobx_payload text, + mobx_type text, + mouseclick_id bigint, + mouseclick_hesitationtime bigint, + mouseclick_label text, + mousemove_x bigint, + mousemove_y bigint, + movenode_id bigint, + movenode_index bigint, + movenode_parentid bigint, + ngrx_action text, + ngrx_duration bigint, + ngrx_state text, + otable_key text, + otable_value text, + pageevent_domcontentloadedeventend bigint, + pageevent_domcontentloadedeventstart bigint, + pageevent_firstcontentfulpaint bigint, + pageevent_firstpaint bigint, + pageevent_loaded boolean, + pageevent_loadeventend bigint, + pageevent_loadeventstart bigint, + pageevent_messageid bigint, + pageevent_referrer text, + pageevent_requeststart bigint, + pageevent_responseend bigint, + pageevent_responsestart bigint, + pageevent_speedindex bigint, + pageevent_timestamp bigint, + pageevent_url text, + pageloadtiming_domcontentloadedeventend bigint, + pageloadtiming_domcontentloadedeventstart bigint, + pageloadtiming_firstcontentfulpaint bigint, + pageloadtiming_firstpaint bigint, + pageloadtiming_loadeventend bigint, + pageloadtiming_loadeventstart bigint, + pageloadtiming_requeststart bigint, + pageloadtiming_responseend bigint, + pageloadtiming_responsestart bigint, + pagerendertiming_speedindex bigint, + pagerendertiming_timetointeractive bigint, + pagerendertiming_visuallycomplete bigint, + performancetrack_frames bigint, + performancetrack_ticks bigint, + performancetrack_totaljsheapsize bigint, + performancetrack_usedjsheapsize bigint, + performancetrackaggr_avgcpu bigint, + performancetrackaggr_avgfps bigint, + performancetrackaggr_avgtotaljsheapsize bigint, + performancetrackaggr_avgusedjsheapsize bigint, + performancetrackaggr_maxcpu bigint, + performancetrackaggr_maxfps bigint, + performancetrackaggr_maxtotaljsheapsize bigint, + performancetrackaggr_maxusedjsheapsize bigint, + performancetrackaggr_mincpu bigint, + performancetrackaggr_minfps bigint, + performancetrackaggr_mintotaljsheapsize bigint, + performancetrackaggr_minusedjsheapsize bigint, + performancetrackaggr_timestampend bigint, + performancetrackaggr_timestampstart bigint, + profiler_args text, + profiler_duration bigint, + profiler_name text, + profiler_result text, + rawcustomevent_name text, + rawcustomevent_payload text, + rawerrorevent_message text, + rawerrorevent_name text, + rawerrorevent_payload text, + rawerrorevent_source text, + rawerrorevent_timestamp bigint, + redux_action text, + redux_duration bigint, + redux_state text, + removenode_id bigint, + removenodeattribute_id bigint, + removenodeattribute_name text, + resourceevent_decodedbodysize bigint, + resourceevent_duration bigint, + resourceevent_encodedbodysize bigint, + resourceevent_headersize bigint, + resourceevent_messageid bigint, + resourceevent_method text, + resourceevent_status bigint, + resourceevent_success boolean, + resourceevent_timestamp bigint, + resourceevent_ttfb bigint, + resourceevent_type text, + resourceevent_url text, + resourcetiming_decodedbodysize bigint, + resourcetiming_duration bigint, + resourcetiming_encodedbodysize bigint, + resourcetiming_headersize bigint, + resourcetiming_initiator text, + resourcetiming_timestamp bigint, + resourcetiming_ttfb bigint, + resourcetiming_url text, + sessiondisconnect boolean, + sessiondisconnect_timestamp bigint, + sessionend boolean, + sessionend_timestamp bigint, + sessionstart_projectid bigint, + sessionstart_revid text, + sessionstart_timestamp bigint, + sessionstart_trackerversion text, + sessionstart_useragent text, + sessionstart_userbrowser text, + sessionstart_userbrowserversion text, + sessionstart_usercountry text, + sessionstart_userdevice text, + sessionstart_userdeviceheapsize bigint, + sessionstart_userdevicememorysize bigint, + sessionstart_userdevicetype text, + sessionstart_useros text, + sessionstart_userosversion text, + sessionstart_useruuid text, + setcssdata_data bigint, + setcssdata_id bigint, + setinputchecked_checked bigint, + setinputchecked_id bigint, + setinputtarget_id bigint, + setinputtarget_label bigint, + setinputvalue_id bigint, + setinputvalue_mask bigint, + setinputvalue_value bigint, + setnodeattribute_id bigint, + setnodeattribute_name bigint, + setnodeattribute_value bigint, + setnodedata_data bigint, + setnodedata_id bigint, + setnodescroll_id bigint, + setnodescroll_x bigint, + setnodescroll_y bigint, + setpagelocation_navigationstart bigint, + setpagelocation_referrer text, + setpagelocation_url text, + setpagevisibility_hidden boolean, + setviewportscroll_x bigint, + setviewportscroll_y bigint, + setviewportsize_height bigint, + setviewportsize_width bigint, + stateaction_type text, + stateactionevent_messageid bigint, + stateactionevent_timestamp bigint, + stateactionevent_type text, + timestamp_timestamp bigint, + useranonymousid_id text, + userid_id text, + vuex_mutation text, + vuex_state text, + longtasks_timestamp bigint, + longtasks_duration bigint, + longtasks_context bigint, + longtasks_containertype bigint, + longtasks_containersrc text, + longtasks_containerid text, + longtasks_containername bigint, + setnodeurlbasedattribute_id bigint, + setnodeurlbasedattribute_name text, + setnodeurlbasedattribute_value text, + setnodeurlbasedattribute_baseurl text, + setstyledata_id bigint, + setstyledata_data text, + setstyledata_baseurl text, + issueevent_messageid bigint, + issueevent_timestamp bigint, + issueevent_type text, + issueevent_contextstring text, + issueevent_context text, + issueevent_payload text, + technicalinfo_type text, + technicalinfo_value text, + customissue_name text, + customissue_payload text, + pageclose bigint, + received_at bigint, + batch_order_number bigint +); From dab72fbb38b431486a302f8adcbde11cf8f4751f Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 10 Aug 2022 12:23:42 +0200 Subject: [PATCH 27/33] snowflakes working --- ee/connectors/db/tables.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ee/connectors/db/tables.py b/ee/connectors/db/tables.py index 58f57514c..f9e6eaaef 100644 --- a/ee/connectors/db/tables.py +++ b/ee/connectors/db/tables.py @@ -63,6 +63,11 @@ def create_tables_snowflake(db): db.engine.execute(q) print(f"`connector_sessions` table created succesfully.") + #with open(base_path / 'sql' / 'snowflake_events_detailed.sql') as f: + # q = f.read() + #db.engine.execute(q) + #print(f"`connector_user_events_detailed` table created succesfully.") + def create_tables_redshift(db): with open(base_path / 'sql' / 'redshift_events.sql') as f: From 7bfe78a29a2d090241f9385d6bb5ebdbe9fee7ae Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 19 Aug 2022 15:13:27 +0100 Subject: [PATCH 28/33] feat(assist): configurable LISTEN_PORT feat(chalice): configurable LISTEN_PORT feat(alerts): configurable LISTEN_PORT --- api/Dockerfile | 1 + api/Dockerfile.alerts | 1 + api/entrypoint.sh | 2 +- api/entrypoint_alerts.sh | 2 +- ee/api/Dockerfile | 1 + ee/api/Dockerfile.alerts | 1 + ee/api/entrypoint.sh | 2 +- ee/api/entrypoint_alerts.sh | 2 +- ee/utilities/Dockerfile | 3 ++- ee/utilities/server.js | 2 +- utilities/Dockerfile | 3 ++- utilities/server.js | 2 +- 12 files changed, 14 insertions(+), 8 deletions(-) diff --git a/api/Dockerfile b/api/Dockerfile index 5ee6af463..a7321ec58 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -8,6 +8,7 @@ ARG envarg # Startup daemon ENV SOURCE_MAP_VERSION=0.7.4 \ APP_NAME=chalice \ + LISTEN_PORT=8000 \ ENTERPRISE_BUILD=${envarg} ADD https://unpkg.com/source-map@${SOURCE_MAP_VERSION}/lib/mappings.wasm /mappings.wasm diff --git a/api/Dockerfile.alerts b/api/Dockerfile.alerts index fdc3a9a36..4595035c2 100644 --- a/api/Dockerfile.alerts +++ b/api/Dockerfile.alerts @@ -7,6 +7,7 @@ ARG envarg ENV APP_NAME=alerts \ pg_minconn=1 \ pg_maxconn=10 \ + LISTEN_PORT=8000 \ ENTERPRISE_BUILD=${envarg} COPY requirements.txt /work_tmp/requirements.txt diff --git a/api/entrypoint.sh b/api/entrypoint.sh index 94b56121a..68f24cbd9 100755 --- a/api/entrypoint.sh +++ b/api/entrypoint.sh @@ -2,4 +2,4 @@ cd sourcemap-reader nohup npm start &> /tmp/sourcemap-reader.log & cd .. -uvicorn app:app --host 0.0.0.0 --reload --proxy-headers +uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --reload --proxy-headers diff --git a/api/entrypoint_alerts.sh b/api/entrypoint_alerts.sh index 861206589..4c9a66672 100755 --- a/api/entrypoint_alerts.sh +++ b/api/entrypoint_alerts.sh @@ -1,3 +1,3 @@ #!/bin/sh -uvicorn app:app --host 0.0.0.0 --reload +uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --reload diff --git a/ee/api/Dockerfile b/ee/api/Dockerfile index 73aa4a6a4..c073684c6 100644 --- a/ee/api/Dockerfile +++ b/ee/api/Dockerfile @@ -6,6 +6,7 @@ RUN apk add --no-cache build-base libressl libffi-dev libressl-dev libxslt-dev l ARG envarg ENV SOURCE_MAP_VERSION=0.7.4 \ APP_NAME=chalice \ + LISTEN_PORT=8000 \ ENTERPRISE_BUILD=${envarg} ADD https://unpkg.com/source-map@${SOURCE_MAP_VERSION}/lib/mappings.wasm /mappings.wasm diff --git a/ee/api/Dockerfile.alerts b/ee/api/Dockerfile.alerts index 86a5915cd..3b16c0d7d 100644 --- a/ee/api/Dockerfile.alerts +++ b/ee/api/Dockerfile.alerts @@ -7,6 +7,7 @@ ARG envarg ENV APP_NAME=alerts \ pg_minconn=1 \ pg_maxconn=10 \ + LISTEN_PORT=8000 \ ENTERPRISE_BUILD=${envarg} COPY requirements-alerts.txt /work_tmp/requirements.txt diff --git a/ee/api/entrypoint.sh b/ee/api/entrypoint.sh index 724c7d6f1..fcb58b528 100755 --- a/ee/api/entrypoint.sh +++ b/ee/api/entrypoint.sh @@ -4,4 +4,4 @@ source /tmp/.env.override cd sourcemap-reader nohup npm start &> /tmp/sourcemap-reader.log & cd .. -uvicorn app:app --host 0.0.0.0 --reload --proxy-headers +uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --reload --proxy-headers diff --git a/ee/api/entrypoint_alerts.sh b/ee/api/entrypoint_alerts.sh index 04e60ce1b..fa492d5b7 100755 --- a/ee/api/entrypoint_alerts.sh +++ b/ee/api/entrypoint_alerts.sh @@ -1,4 +1,4 @@ #!/bin/sh sh env_vars.sh source /tmp/.env.override -uvicorn app:app --host 0.0.0.0 --reload +uvicorn app:app --host 0.0.0.0 --port $LISTEN_PORT --reload diff --git a/ee/utilities/Dockerfile b/ee/utilities/Dockerfile index 2de6197a2..9b7b96388 100644 --- a/ee/utilities/Dockerfile +++ b/ee/utilities/Dockerfile @@ -5,7 +5,8 @@ RUN apk add --no-cache tini git libc6-compat && ln -s /lib/libc.musl-x86_64.so.1 ARG envarg ENV ENTERPRISE_BUILD=${envarg} \ - MAXMINDDB_FILE=/home/openreplay/geoip.mmdb + MAXMINDDB_FILE=/home/openreplay/geoip.mmdb \ + LISTEN_PORT=9001 WORKDIR /work COPY package.json . COPY package-lock.json . diff --git a/ee/utilities/server.js b/ee/utilities/server.js index 93d6d2a2e..13a89be79 100644 --- a/ee/utilities/server.js +++ b/ee/utilities/server.js @@ -9,7 +9,7 @@ if (process.env.redis === "true") { } const HOST = '0.0.0.0'; -const PORT = 9001; +const PORT = process.env.LISTEN_PORT || 9001; let debug = process.env.debug === "1" || false; const PREFIX = process.env.prefix || `/assist` diff --git a/utilities/Dockerfile b/utilities/Dockerfile index 2de6197a2..9b7b96388 100644 --- a/utilities/Dockerfile +++ b/utilities/Dockerfile @@ -5,7 +5,8 @@ RUN apk add --no-cache tini git libc6-compat && ln -s /lib/libc.musl-x86_64.so.1 ARG envarg ENV ENTERPRISE_BUILD=${envarg} \ - MAXMINDDB_FILE=/home/openreplay/geoip.mmdb + MAXMINDDB_FILE=/home/openreplay/geoip.mmdb \ + LISTEN_PORT=9001 WORKDIR /work COPY package.json . COPY package-lock.json . diff --git a/utilities/server.js b/utilities/server.js index ad03aafab..331d45a80 100644 --- a/utilities/server.js +++ b/utilities/server.js @@ -4,7 +4,7 @@ const socket = require("./servers/websocket"); const {request_logger} = require("./utils/helper"); const HOST = '0.0.0.0'; -const PORT = 9001; +const PORT = process.env.LISTEN_PORT || 9001; const wsapp = express(); wsapp.use(express.json()); From 29cdf9dccd24c25fd68b15dadb9fa9954da1412c Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Fri, 19 Aug 2022 15:32:27 +0100 Subject: [PATCH 29/33] feat(chalice): refactored assist configuration --- api/chalicelib/core/assist.py | 19 +++++++++++-------- api/env.default | 7 +++---- ee/api/env.default | 7 +++---- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/api/chalicelib/core/assist.py b/api/chalicelib/core/assist.py index 6fc8bcd90..39ae101c4 100644 --- a/api/chalicelib/core/assist.py +++ b/api/chalicelib/core/assist.py @@ -45,8 +45,8 @@ def get_live_sessions_ws(project_id, body: schemas.LiveSessionsSearchPayloadSche def __get_live_sessions_ws(project_id, data): project_key = projects.get_project_key(project_id) try: - connected_peers = requests.post(config("assist") % config("S3_KEY") + f"/{project_key}", json=data, - timeout=config("assistTimeout", cast=int, default=5)) + connected_peers = requests.post(config("ASSIST_URL") + config("assist") % config("S3_KEY") + f"/{project_key}", + json=data, timeout=config("assistTimeout", cast=int, default=5)) if connected_peers.status_code != 200: print("!! issue with the peer-server") print(connected_peers.text) @@ -76,8 +76,9 @@ def __get_live_sessions_ws(project_id, data): def get_live_session_by_id(project_id, session_id): project_key = projects.get_project_key(project_id) try: - connected_peers = requests.get(config("assist") % config("S3_KEY") + f"/{project_key}/{session_id}", - timeout=config("assistTimeout", cast=int, default=5)) + connected_peers = requests.get( + config("ASSIST_URL") + config("assist") % config("S3_KEY") + f"/{project_key}/{session_id}", + timeout=config("assistTimeout", cast=int, default=5)) if connected_peers.status_code != 200: print("!! issue with the peer-server") print(connected_peers.text) @@ -105,8 +106,9 @@ def is_live(project_id, session_id, project_key=None): if project_key is None: project_key = projects.get_project_key(project_id) try: - connected_peers = requests.get(config("assistList") % config("S3_KEY") + f"/{project_key}/{session_id}", - timeout=config("assistTimeout", cast=int, default=5)) + connected_peers = requests.get( + config("ASSIST_URL") + config("assistList") % config("S3_KEY") + f"/{project_key}/{session_id}", + timeout=config("assistTimeout", cast=int, default=5)) if connected_peers.status_code != 200: print("!! issue with the peer-server") print(connected_peers.text) @@ -133,8 +135,9 @@ def autocomplete(project_id, q: str, key: str = None): if key: params["key"] = key try: - results = requests.get(config("assistList") % config("S3_KEY") + f"/{project_key}/autocomplete", - params=params, timeout=config("assistTimeout", cast=int, default=5)) + results = requests.get( + config("ASSIST_URL") + config("assistList") % config("S3_KEY") + f"/{project_key}/autocomplete", + params=params, timeout=config("assistTimeout", cast=int, default=5)) if results.status_code != 200: print("!! issue with the peer-server") print(results.text) diff --git a/api/env.default b/api/env.default index 01ad50606..7053e2b13 100644 --- a/api/env.default +++ b/api/env.default @@ -11,9 +11,7 @@ S3_HOST= S3_KEY= S3_SECRET= SITE_URL= -alert_ntf=http://127.0.0.1:8000/async/alerts/notifications/%s announcement_url= -assign_link=http://127.0.0.1:8000/async/email_assignment async_Token= captcha_key= captcha_server= @@ -28,8 +26,9 @@ jwt_algorithm=HS512 jwt_exp_delta_seconds=2592000 jwt_issuer=openreplay-default-foss jwt_secret="SET A RANDOM STRING HERE" -assist=http://assist-openreplay.app.svc.cluster.local:9001/assist/%s/sockets-live -assistList=http://assist-openreplay.app.svc.cluster.local:9001/assist/%s/sockets-list +ASSIST_URL=http://assist-openreplay.app.svc.cluster.local:9001 +assist=/assist/%s/sockets-live +assistList=/assist/%s/sockets-list pg_dbname=postgres pg_host=postgresql.db.svc.cluster.local pg_password=asayerPostgres diff --git a/ee/api/env.default b/ee/api/env.default index 65f55a03c..ef04bbc3b 100644 --- a/ee/api/env.default +++ b/ee/api/env.default @@ -13,9 +13,7 @@ S3_KEY= S3_SECRET= SAML2_MD_URL= SITE_URL= -alert_ntf=http://127.0.0.1:8000/async/alerts/notifications/%s announcement_url= -assign_link=http://127.0.0.1:8000/async/email_assignment async_Token= captcha_key= captcha_server= @@ -37,8 +35,9 @@ jwt_algorithm=HS512 jwt_exp_delta_seconds=2592000 jwt_issuer=openreplay-default-ee jwt_secret="SET A RANDOM STRING HERE" -assist=http://assist-openreplay.app.svc.cluster.local:9001/assist/%s/sockets-live -assistList=http://assist-openreplay.app.svc.cluster.local:9001/assist/%s/sockets-list +ASSIST_URL=http://assist-openreplay.app.svc.cluster.local:9001 +assist=/assist/%s/sockets-live +assistList=/assist/%s/sockets-list pg_dbname=postgres pg_host=postgresql.db.svc.cluster.local pg_password=asayerPostgres From 41aeaa83ee0b6af7a1c72538f0389d5e4a47bcc9 Mon Sep 17 00:00:00 2001 From: Fernando Doglio Date: Tue, 23 Aug 2022 16:03:25 +0200 Subject: [PATCH 30/33] :broom: adding link to our youtube channel to the README file --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 31252da7d..71e419646 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ Please refer to the [official OpenReplay documentation](https://docs.openreplay. - [Slack](https://slack.openreplay.com) (Connect with our engineers and community) - [GitHub](https://github.com/openreplay/openreplay/issues) (Bug and issue reports) - [Twitter](https://twitter.com/OpenReplayHQ) (Product updates, Great content) +- [YouTube](https://www.youtube.com/channel/UCcnWlW-5wEuuPAwjTR1Ydxw) (How-to tutorials, past Community Calls) - [Website chat](https://openreplay.com) (Talk to us) ## Contributing From 3381e3a081cb132021cdabc56c0020a3cc90bb75 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Wed, 24 Aug 2022 14:55:10 +0200 Subject: [PATCH 31/33] New message with batchsize --- ee/connectors/consumer.py | 2 +- ee/connectors/msgcodec/codec.py | 757 ----------------------------- ee/connectors/msgcodec/messages.py | 251 +++++----- ee/connectors/msgcodec/msgcodec.py | 739 ++++++++++++++++++++++++++++ 4 files changed, 858 insertions(+), 891 deletions(-) create mode 100644 ee/connectors/msgcodec/msgcodec.py diff --git a/ee/connectors/consumer.py b/ee/connectors/consumer.py index acf55f2d5..99beb605e 100644 --- a/ee/connectors/consumer.py +++ b/ee/connectors/consumer.py @@ -3,7 +3,7 @@ from kafka import KafkaConsumer from datetime import datetime from collections import defaultdict -from msgcodec.codec import MessageCodec +from msgcodec.msgcodec import MessageCodec from msgcodec.messages import SessionEnd from db.api import DBConnection from db.models import events_detailed_table_name, events_table_name, sessions_table_name diff --git a/ee/connectors/msgcodec/codec.py b/ee/connectors/msgcodec/codec.py index 7e1aabbf9..5aeb0e4ed 100644 --- a/ee/connectors/msgcodec/codec.py +++ b/ee/connectors/msgcodec/codec.py @@ -1,7 +1,4 @@ import io -from typing import List -from msgcodec.messages import * - class Codec: """ @@ -24,8 +21,6 @@ class Codec: i = 0 # n of byte (max 9 for uint64) while True: b = reader.read(1) - if len(b) == 0: - raise IndexError('bytes out of range') num = int.from_bytes(b, "big", signed=False) # print(i, x) @@ -65,755 +60,3 @@ class Codec: return s.decode("utf-8", errors="replace").replace("\x00", "\uFFFD") except UnicodeDecodeError: return None - - -class MessageCodec(Codec): - - def encode(self, m: Message) -> bytes: - ... - - def decode(self, b: bytes) -> Message: - reader = io.BytesIO(b) - return self.read_head_message(reader) - - def decode_detailed(self, b: bytes) -> List[Message]: - reader = io.BytesIO(b) - messages_list = list() - while True: - try: - messages_list.append(self.read_head_message(reader)) - except IndexError: - break - return messages_list - - def read_head_message(self, reader: io.BytesIO) -> Message: - message_id = self.read_message_id(reader) - if message_id == 0: - return Timestamp( - timestamp=self.read_uint(reader) - ) - if message_id == 1: - return SessionStart( - timestamp=self.read_uint(reader), - project_id=self.read_uint(reader), - tracker_version=self.read_string(reader), - rev_id=self.read_string(reader), - user_uuid=self.read_string(reader), - user_agent=self.read_string(reader), - user_os=self.read_string(reader), - user_os_version=self.read_string(reader), - user_browser=self.read_string(reader), - user_browser_version=self.read_string(reader), - user_device=self.read_string(reader), - user_device_type=self.read_string(reader), - user_device_memory_size=self.read_uint(reader), - user_device_heap_size=self.read_uint(reader), - user_country=self.read_string(reader) - ) - - if message_id == 2: - return SessionDisconnect( - timestamp=self.read_uint(reader) - ) - - if message_id == 3: - return SessionEnd( - timestamp=self.read_uint(reader) - ) - - if message_id == 4: - return SetPageLocation( - url=self.read_string(reader), - referrer=self.read_string(reader), - navigation_start=self.read_uint(reader) - ) - - if message_id == 5: - return SetViewportSize( - width=self.read_uint(reader), - height=self.read_uint(reader) - ) - - if message_id == 6: - return SetViewportScroll( - x=self.read_int(reader), - y=self.read_int(reader) - ) - - if message_id == 7: - return CreateDocument() - - if message_id == 8: - return CreateElementNode( - id=self.read_uint(reader), - parent_id=self.read_uint(reader), - index=self.read_uint(reader), - tag=self.read_string(reader), - svg=self.read_boolean(reader), - ) - - if message_id == 9: - return CreateTextNode( - id=self.read_uint(reader), - parent_id=self.read_uint(reader), - index=self.read_uint(reader) - ) - - if message_id == 10: - return MoveNode( - id=self.read_uint(reader), - parent_id=self.read_uint(reader), - index=self.read_uint(reader) - ) - - if message_id == 11: - return RemoveNode( - id=self.read_uint(reader) - ) - - if message_id == 12: - return SetNodeAttribute( - id=self.read_uint(reader), - name=self.read_string(reader), - value=self.read_string(reader) - ) - - if message_id == 13: - return RemoveNodeAttribute( - id=self.read_uint(reader), - name=self.read_string(reader) - ) - - if message_id == 14: - return SetNodeData( - id=self.read_uint(reader), - data=self.read_string(reader) - ) - - if message_id == 15: - return SetCSSData( - id=self.read_uint(reader), - data=self.read_string(reader) - ) - - if message_id == 16: - return SetNodeScroll( - id=self.read_uint(reader), - x=self.read_int(reader), - y=self.read_int(reader), - ) - - if message_id == 17: - return SetInputTarget( - id=self.read_uint(reader), - label=self.read_string(reader) - ) - - if message_id == 18: - return SetInputValue( - id=self.read_uint(reader), - value=self.read_string(reader), - mask=self.read_int(reader), - ) - - if message_id == 19: - return SetInputChecked( - id=self.read_uint(reader), - checked=self.read_boolean(reader) - ) - - if message_id == 20: - return MouseMove( - x=self.read_uint(reader), - y=self.read_uint(reader) - ) - - if message_id == 21: - return MouseClickDepricated( - id=self.read_uint(reader), - hesitation_time=self.read_uint(reader), - label=self.read_string(reader) - ) - - if message_id == 22: - return ConsoleLog( - level=self.read_string(reader), - value=self.read_string(reader) - ) - - if message_id == 23: - return PageLoadTiming( - request_start=self.read_uint(reader), - response_start=self.read_uint(reader), - response_end=self.read_uint(reader), - dom_content_loaded_event_start=self.read_uint(reader), - dom_content_loaded_event_end=self.read_uint(reader), - load_event_start=self.read_uint(reader), - load_event_end=self.read_uint(reader), - first_paint=self.read_uint(reader), - first_contentful_paint=self.read_uint(reader) - ) - - if message_id == 24: - return PageRenderTiming( - speed_index=self.read_uint(reader), - visually_complete=self.read_uint(reader), - time_to_interactive=self.read_uint(reader), - ) - - if message_id == 25: - return JSException( - name=self.read_string(reader), - message=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 26: - return RawErrorEvent( - timestamp=self.read_uint(reader), - source=self.read_string(reader), - name=self.read_string(reader), - message=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 27: - return RawCustomEvent( - name=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 28: - return UserID( - id=self.read_string(reader) - ) - - if message_id == 29: - return UserAnonymousID( - id=self.read_string(reader) - ) - - if message_id == 30: - return Metadata( - key=self.read_string(reader), - value=self.read_string(reader) - ) - - if message_id == 31: - return PageEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - url=self.read_string(reader), - referrer=self.read_string(reader), - loaded=self.read_boolean(reader), - request_start=self.read_uint(reader), - response_start=self.read_uint(reader), - response_end=self.read_uint(reader), - dom_content_loaded_event_start=self.read_uint(reader), - dom_content_loaded_event_end=self.read_uint(reader), - load_event_start=self.read_uint(reader), - load_event_end=self.read_uint(reader), - first_paint=self.read_uint(reader), - first_contentful_paint=self.read_uint(reader), - speed_index=self.read_uint(reader), - visually_complete=self.read_uint(reader), - time_to_interactive=self.read_uint(reader) - ) - - if message_id == 32: - return InputEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - value=self.read_string(reader), - value_masked=self.read_boolean(reader), - label=self.read_string(reader), - ) - - if message_id == 33: - return ClickEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - hesitation_time=self.read_uint(reader), - label=self.read_string(reader) - ) - - if message_id == 34: - return ErrorEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - source=self.read_string(reader), - name=self.read_string(reader), - message=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 35: - - message_id = self.read_uint(reader) - ts = self.read_uint(reader) - if ts > 9999999999999: - ts = None - return ResourceEvent( - message_id=message_id, - timestamp=ts, - duration=self.read_uint(reader), - ttfb=self.read_uint(reader), - header_size=self.read_uint(reader), - encoded_body_size=self.read_uint(reader), - decoded_body_size=self.read_uint(reader), - url=self.read_string(reader), - type=self.read_string(reader), - success=self.read_boolean(reader), - method=self.read_string(reader), - status=self.read_uint(reader) - ) - - if message_id == 36: - return CustomEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - name=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 37: - return CSSInsertRule( - id=self.read_uint(reader), - rule=self.read_string(reader), - index=self.read_uint(reader) - ) - - if message_id == 38: - return CSSDeleteRule( - id=self.read_uint(reader), - index=self.read_uint(reader) - ) - - if message_id == 39: - return Fetch( - method=self.read_string(reader), - url=self.read_string(reader), - request=self.read_string(reader), - response=self.read_string(reader), - status=self.read_uint(reader), - timestamp=self.read_uint(reader), - duration=self.read_uint(reader) - ) - - if message_id == 40: - return Profiler( - name=self.read_string(reader), - duration=self.read_uint(reader), - args=self.read_string(reader), - result=self.read_string(reader) - ) - - if message_id == 41: - return OTable( - key=self.read_string(reader), - value=self.read_string(reader) - ) - - if message_id == 42: - return StateAction( - type=self.read_string(reader) - ) - - if message_id == 43: - return StateActionEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - type=self.read_string(reader) - ) - - if message_id == 44: - return Redux( - action=self.read_string(reader), - state=self.read_string(reader), - duration=self.read_uint(reader) - ) - - if message_id == 45: - return Vuex( - mutation=self.read_string(reader), - state=self.read_string(reader), - ) - - if message_id == 46: - return MobX( - type=self.read_string(reader), - payload=self.read_string(reader), - ) - - if message_id == 47: - return NgRx( - action=self.read_string(reader), - state=self.read_string(reader), - duration=self.read_uint(reader) - ) - - if message_id == 48: - return GraphQL( - operation_kind=self.read_string(reader), - operation_name=self.read_string(reader), - variables=self.read_string(reader), - response=self.read_string(reader) - ) - - if message_id == 49: - return PerformanceTrack( - frames=self.read_int(reader), - ticks=self.read_int(reader), - total_js_heap_size=self.read_uint(reader), - used_js_heap_size=self.read_uint(reader) - ) - - if message_id == 50: - return GraphQLEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - name=self.read_string(reader) - ) - - if message_id == 51: - return FetchEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - method=self.read_string(reader), - url=self.read_string(reader), - request=self.read_string(reader), - response=self.read_string(reader), - status=self.read_uint(reader), - duration=self.read_uint(reader) - ) - - if message_id == 52: - return DomDrop( - timestamp=self.read_uint(reader) - ) - - if message_id == 53: - return ResourceTiming( - timestamp=self.read_uint(reader), - duration=self.read_uint(reader), - ttfb=self.read_uint(reader), - header_size=self.read_uint(reader), - encoded_body_size=self.read_uint(reader), - decoded_body_size=self.read_uint(reader), - url=self.read_string(reader), - initiator=self.read_string(reader) - ) - - if message_id == 54: - return ConnectionInformation( - downlink=self.read_uint(reader), - type=self.read_string(reader) - ) - - if message_id == 55: - return SetPageVisibility( - hidden=self.read_boolean(reader) - ) - - if message_id == 56: - return PerformanceTrackAggr( - timestamp_start=self.read_uint(reader), - timestamp_end=self.read_uint(reader), - min_fps=self.read_uint(reader), - avg_fps=self.read_uint(reader), - max_fps=self.read_uint(reader), - min_cpu=self.read_uint(reader), - avg_cpu=self.read_uint(reader), - max_cpu=self.read_uint(reader), - min_total_js_heap_size=self.read_uint(reader), - avg_total_js_heap_size=self.read_uint(reader), - max_total_js_heap_size=self.read_uint(reader), - min_used_js_heap_size=self.read_uint(reader), - avg_used_js_heap_size=self.read_uint(reader), - max_used_js_heap_size=self.read_uint(reader) - ) - - if message_id == 59: - return LongTask( - timestamp=self.read_uint(reader), - duration=self.read_uint(reader), - context=self.read_uint(reader), - container_type=self.read_uint(reader), - container_src=self.read_string(reader), - container_id=self.read_string(reader), - container_name=self.read_string(reader) - ) - - if message_id == 60: - return SetNodeURLBasedAttribute( - id=self.read_uint(reader), - name=self.read_string(reader), - value=self.read_string(reader), - base_url=self.read_string(reader) - ) - - if message_id == 61: - return SetStyleData( - id=self.read_uint(reader), - data=self.read_string(reader), - base_url=self.read_string(reader) - ) - - if message_id == 62: - return IssueEvent( - message_id=self.read_uint(reader), - timestamp=self.read_uint(reader), - type=self.read_string(reader), - context_string=self.read_string(reader), - context=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 63: - return TechnicalInfo( - type=self.read_string(reader), - value=self.read_string(reader) - ) - - if message_id == 64: - return CustomIssue( - name=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 65: - return PageClose() - - if message_id == 66: - return AssetCache( - url=self.read_string(reader) - ) - - if message_id == 67: - return CSSInsertRuleURLBased( - id=self.read_uint(reader), - rule=self.read_string(reader), - index=self.read_uint(reader), - base_url=self.read_string(reader) - ) - - if message_id == 69: - return MouseClick( - id=self.read_uint(reader), - hesitation_time=self.read_uint(reader), - label=self.read_string(reader), - selector=self.read_string(reader) - ) - - if message_id == 70: - return CreateIFrameDocument( - frame_id=self.read_uint(reader), - id=self.read_uint(reader) - ) - - if message_id == 80: - return BatchMeta( - page_no=self.read_uint(reader), - first_index=self.read_uint(reader), - timestamp=self.read_int(reader) - ) - - if message_id == 90: - return IOSSessionStart( - timestamp=self.read_uint(reader), - project_id=self.read_uint(reader), - tracker_version=self.read_string(reader), - rev_id=self.read_string(reader), - user_uuid=self.read_string(reader), - user_os=self.read_string(reader), - user_os_version=self.read_string(reader), - user_device=self.read_string(reader), - user_device_type=self.read_string(reader), - user_country=self.read_string(reader) - ) - - if message_id == 91: - return IOSSessionEnd( - timestamp=self.read_uint(reader) - ) - - if message_id == 92: - return IOSMetadata( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - key=self.read_string(reader), - value=self.read_string(reader) - ) - - if message_id == 93: - return IOSCustomEvent( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - name=self.read_string(reader), - payload=self.read_string(reader) - ) - - if message_id == 94: - return IOSUserID( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - value=self.read_string(reader) - ) - - if message_id == 95: - return IOSUserAnonymousID( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - value=self.read_string(reader) - ) - - if message_id == 96: - return IOSScreenChanges( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - x=self.read_uint(reader), - y=self.read_uint(reader), - width=self.read_uint(reader), - height=self.read_uint(reader) - ) - - if message_id == 97: - return IOSCrash( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - name=self.read_string(reader), - reason=self.read_string(reader), - stacktrace=self.read_string(reader) - ) - - if message_id == 98: - return IOSScreenEnter( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - title=self.read_string(reader), - view_name=self.read_string(reader) - ) - - if message_id == 99: - return IOSScreenLeave( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - title=self.read_string(reader), - view_name=self.read_string(reader) - ) - - if message_id == 100: - return IOSClickEvent( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - label=self.read_string(reader), - x=self.read_uint(reader), - y=self.read_uint(reader) - ) - - if message_id == 101: - return IOSInputEvent( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - value=self.read_string(reader), - value_masked=self.read_boolean(reader), - label=self.read_string(reader) - ) - - if message_id == 102: - return IOSPreformanceEvent( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - name=self.read_string(reader), - value=self.read_uint(reader) - ) - - if message_id == 103: - return IOSLog( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - severity=self.read_string(reader), - content=self.read_string(reader) - ) - - if message_id == 104: - return IOSInternalError( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - content=self.read_string(reader) - ) - - if message_id == 105: - return IOSNetworkCall( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - duration=self.read_uint(reader), - headers=self.read_string(reader), - body=self.read_string(reader), - url=self.read_string(reader), - success=self.read_boolean(reader), - method=self.read_string(reader), - status=self.read_uint(reader) - ) - - if message_id == 107: - return IOSBatchMeta( - timestamp=self.read_uint(reader), - length=self.read_uint(reader), - first_index=self.read_uint(reader) - ) - - if message_id == 110: - return IOSPerformanceAggregated( - timestamp_start=self.read_uint(reader), - timestamp_end=self.read_uint(reader), - min_fps=self.read_uint(reader), - avg_fps=self.read_uint(reader), - max_fps=self.read_uint(reader), - min_cpu=self.read_uint(reader), - avg_cpu=self.read_uint(reader), - max_cpu=self.read_uint(reader), - min_memory=self.read_uint(reader), - avg_memory=self.read_uint(reader), - max_memory=self.read_uint(reader), - min_battery=self.read_uint(reader), - avg_battery=self.read_uint(reader), - max_battery=self.read_uint(reader) - ) - if message_id == 111: - return IOSIssueEvent( - timestamp=self.read_uint(reader), - type=self.read_string(reader), - context_string=self.read_string(reader), - context=self.read_string(reader), - payload=self.read_string(reader) - ) - - def read_message_id(self, reader: io.BytesIO) -> int: - """ - Read and return the first byte where the message id is encoded - """ - id_ = self.read_uint(reader) - return id_ - - @staticmethod - def check_message_id(b: bytes) -> int: - """ - todo: make it static and without reader. It's just the first byte - Read and return the first byte where the message id is encoded - """ - reader = io.BytesIO(b) - id_ = Codec.read_uint(reader) - - return id_ - - @staticmethod - def decode_key(b) -> int: - """ - Decode the message key (encoded with little endian) - """ - try: - decoded = int.from_bytes(b, "little", signed=False) - except Exception as e: - raise UnicodeDecodeError(f"Error while decoding message key (SessionID) from {b}\n{e}") - return decoded diff --git a/ee/connectors/msgcodec/messages.py b/ee/connectors/msgcodec/messages.py index bc451b287..83f166ed8 100644 --- a/ee/connectors/msgcodec/messages.py +++ b/ee/connectors/msgcodec/messages.py @@ -1,13 +1,39 @@ -""" -Representations of Kafka messages -""" -from abc import ABC +# Auto-generated, do not edit +from abc import ABC class Message(ABC): pass +class BatchMeta(Message): + __id__ = 80 + + def __init__(self, page_no, first_index, timestamp): + self.page_no = page_no + self.first_index = first_index + self.timestamp = timestamp + + +class BatchMetadata(Message): + __id__ = 81 + + def __init__(self, version, page_no, first_index, timestamp, location): + self.version = version + self.page_no = page_no + self.first_index = first_index + self.timestamp = timestamp + self.location = location + + +class PartitionedMessage(Message): + __id__ = 82 + + def __init__(self, part_no, part_total): + self.part_no = part_no + self.part_total = part_total + + class Timestamp(Message): __id__ = 0 @@ -18,10 +44,7 @@ class Timestamp(Message): class SessionStart(Message): __id__ = 1 - def __init__(self, timestamp, project_id, tracker_version, rev_id, user_uuid, - user_agent, user_os, user_os_version, user_browser, user_browser_version, - user_device, user_device_type, user_device_memory_size, user_device_heap_size, - user_country): + def __init__(self, timestamp, project_id, tracker_version, rev_id, user_uuid, user_agent, user_os, user_os_version, user_browser, user_browser_version, user_device, user_device_type, user_device_memory_size, user_device_heap_size, user_country, user_id): self.timestamp = timestamp self.project_id = project_id self.tracker_version = tracker_version @@ -37,6 +60,7 @@ class SessionStart(Message): self.user_device_memory_size = user_device_memory_size self.user_device_heap_size = user_device_heap_size self.user_country = user_country + self.user_id = user_id class SessionDisconnect(Message): @@ -48,7 +72,6 @@ class SessionDisconnect(Message): class SessionEnd(Message): __id__ = 3 - __name__ = 'SessionEnd' def __init__(self, timestamp): self.timestamp = timestamp @@ -82,13 +105,17 @@ class SetViewportScroll(Message): class CreateDocument(Message): __id__ = 7 + def __init__(self, ): + pass + + class CreateElementNode(Message): __id__ = 8 def __init__(self, id, parent_id, index, tag, svg): self.id = id - self.parent_id = parent_id, + self.parent_id = parent_id self.index = index self.tag = tag self.svg = svg @@ -122,7 +149,7 @@ class RemoveNode(Message): class SetNodeAttribute(Message): __id__ = 12 - def __init__(self, id, name: str, value: str): + def __init__(self, id, name, value): self.id = id self.name = name self.value = value @@ -131,7 +158,7 @@ class SetNodeAttribute(Message): class RemoveNodeAttribute(Message): __id__ = 13 - def __init__(self, id, name: str): + def __init__(self, id, name): self.id = id self.name = name @@ -139,7 +166,7 @@ class RemoveNodeAttribute(Message): class SetNodeData(Message): __id__ = 14 - def __init__(self, id, data: str): + def __init__(self, id, data): self.id = id self.data = data @@ -147,7 +174,7 @@ class SetNodeData(Message): class SetCSSData(Message): __id__ = 15 - def __init__(self, id, data: str): + def __init__(self, id, data): self.id = id self.data = data @@ -155,7 +182,7 @@ class SetCSSData(Message): class SetNodeScroll(Message): __id__ = 16 - def __init__(self, id, x: int, y: int): + def __init__(self, id, x, y): self.id = id self.x = x self.y = y @@ -164,7 +191,7 @@ class SetNodeScroll(Message): class SetInputTarget(Message): __id__ = 17 - def __init__(self, id, label: str): + def __init__(self, id, label): self.id = id self.label = label @@ -172,7 +199,7 @@ class SetInputTarget(Message): class SetInputValue(Message): __id__ = 18 - def __init__(self, id, value: str, mask: int): + def __init__(self, id, value, mask): self.id = id self.value = value self.mask = mask @@ -181,7 +208,7 @@ class SetInputValue(Message): class SetInputChecked(Message): __id__ = 19 - def __init__(self, id, checked: bool): + def __init__(self, id, checked): self.id = id self.checked = checked @@ -197,7 +224,7 @@ class MouseMove(Message): class MouseClickDepricated(Message): __id__ = 21 - def __init__(self, id, hesitation_time, label: str): + def __init__(self, id, hesitation_time, label): self.id = id self.hesitation_time = hesitation_time self.label = label @@ -206,7 +233,7 @@ class MouseClickDepricated(Message): class ConsoleLog(Message): __id__ = 22 - def __init__(self, level: str, value: str): + def __init__(self, level, value): self.level = level self.value = value @@ -214,9 +241,7 @@ class ConsoleLog(Message): class PageLoadTiming(Message): __id__ = 23 - def __init__(self, request_start, response_start, response_end, dom_content_loaded_event_start, - dom_content_loaded_event_end, load_event_start, load_event_end, - first_paint, first_contentful_paint): + def __init__(self, request_start, response_start, response_end, dom_content_loaded_event_start, dom_content_loaded_event_end, load_event_start, load_event_end, first_paint, first_contentful_paint): self.request_start = request_start self.response_start = response_start self.response_end = response_end @@ -236,20 +261,20 @@ class PageRenderTiming(Message): self.visually_complete = visually_complete self.time_to_interactive = time_to_interactive + class JSException(Message): __id__ = 25 - def __init__(self, name: str, message: str, payload: str): + def __init__(self, name, message, payload): self.name = name self.message = message self.payload = payload -class RawErrorEvent(Message): +class IntegrationEvent(Message): __id__ = 26 - def __init__(self, timestamp, source: str, name: str, message: str, - payload: str): + def __init__(self, timestamp, source, name, message, payload): self.timestamp = timestamp self.source = source self.name = name @@ -260,7 +285,7 @@ class RawErrorEvent(Message): class RawCustomEvent(Message): __id__ = 27 - def __init__(self, name: str, payload: str): + def __init__(self, name, payload): self.name = name self.payload = payload @@ -268,44 +293,29 @@ class RawCustomEvent(Message): class UserID(Message): __id__ = 28 - def __init__(self, id: str): + def __init__(self, id): self.id = id class UserAnonymousID(Message): __id__ = 29 - def __init__(self, id: str): + def __init__(self, id): self.id = id class Metadata(Message): __id__ = 30 - def __init__(self, key: str, value: str): + def __init__(self, key, value): self.key = key self.value = value -class PerformanceTrack(Message): - __id__ = 49 - - def __init__(self, frames: int, ticks: int, total_js_heap_size, - used_js_heap_size): - self.frames = frames - self.ticks = ticks - self.total_js_heap_size = total_js_heap_size - self.used_js_heap_size = used_js_heap_size - - class PageEvent(Message): __id__ = 31 - def __init__(self, message_id, timestamp, url: str, referrer: str, - loaded: bool, request_start, response_start, response_end, - dom_content_loaded_event_start, dom_content_loaded_event_end, - load_event_start, load_event_end, first_paint, first_contentful_paint, - speed_index, visually_complete, time_to_interactive): + def __init__(self, message_id, timestamp, url, referrer, loaded, request_start, response_start, response_end, dom_content_loaded_event_start, dom_content_loaded_event_end, load_event_start, load_event_end, first_paint, first_contentful_paint, speed_index, visually_complete, time_to_interactive): self.message_id = message_id self.timestamp = timestamp self.url = url @@ -328,7 +338,7 @@ class PageEvent(Message): class InputEvent(Message): __id__ = 32 - def __init__(self, message_id, timestamp, value: str, value_masked: bool, label: str): + def __init__(self, message_id, timestamp, value, value_masked, label): self.message_id = message_id self.timestamp = timestamp self.value = value @@ -339,18 +349,18 @@ class InputEvent(Message): class ClickEvent(Message): __id__ = 33 - def __init__(self, message_id, timestamp, hesitation_time, label: str): + def __init__(self, message_id, timestamp, hesitation_time, label, selector): self.message_id = message_id self.timestamp = timestamp self.hesitation_time = hesitation_time self.label = label + self.selector = selector class ErrorEvent(Message): __id__ = 34 - def __init__(self, message_id, timestamp, source: str, name: str, message: str, - payload: str): + def __init__(self, message_id, timestamp, source, name, message, payload): self.message_id = message_id self.timestamp = timestamp self.source = source @@ -362,8 +372,7 @@ class ErrorEvent(Message): class ResourceEvent(Message): __id__ = 35 - def __init__(self, message_id, timestamp, duration, ttfb, header_size, encoded_body_size, - decoded_body_size, url: str, type: str, success: bool, method: str, status): + def __init__(self, message_id, timestamp, duration, ttfb, header_size, encoded_body_size, decoded_body_size, url, type, success, method, status): self.message_id = message_id self.timestamp = timestamp self.duration = duration @@ -381,7 +390,7 @@ class ResourceEvent(Message): class CustomEvent(Message): __id__ = 36 - def __init__(self, message_id, timestamp, name: str, payload: str): + def __init__(self, message_id, timestamp, name, payload): self.message_id = message_id self.timestamp = timestamp self.name = name @@ -391,7 +400,7 @@ class CustomEvent(Message): class CSSInsertRule(Message): __id__ = 37 - def __init__(self, id, rule: str, index): + def __init__(self, id, rule, index): self.id = id self.rule = rule self.index = index @@ -408,8 +417,7 @@ class CSSDeleteRule(Message): class Fetch(Message): __id__ = 39 - def __init__(self, method: str, url: str, request: str, response: str, status, - timestamp, duration): + def __init__(self, method, url, request, response, status, timestamp, duration): self.method = method self.url = url self.request = request @@ -422,7 +430,7 @@ class Fetch(Message): class Profiler(Message): __id__ = 40 - def __init__(self, name: str, duration, args: str, result: str): + def __init__(self, name, duration, args, result): self.name = name self.duration = duration self.args = args @@ -432,7 +440,7 @@ class Profiler(Message): class OTable(Message): __id__ = 41 - def __init__(self, key: str, value: str): + def __init__(self, key, value): self.key = key self.value = value @@ -440,14 +448,14 @@ class OTable(Message): class StateAction(Message): __id__ = 42 - def __init__(self, type: str): + def __init__(self, type): self.type = type class StateActionEvent(Message): __id__ = 43 - def __init__(self, message_id, timestamp, type: str): + def __init__(self, message_id, timestamp, type): self.message_id = message_id self.timestamp = timestamp self.type = type @@ -456,7 +464,7 @@ class StateActionEvent(Message): class Redux(Message): __id__ = 44 - def __init__(self, action: str, state: str, duration): + def __init__(self, action, state, duration): self.action = action self.state = state self.duration = duration @@ -465,7 +473,7 @@ class Redux(Message): class Vuex(Message): __id__ = 45 - def __init__(self, mutation: str, state: str): + def __init__(self, mutation, state): self.mutation = mutation self.state = state @@ -473,7 +481,7 @@ class Vuex(Message): class MobX(Message): __id__ = 46 - def __init__(self, type: str, payload: str): + def __init__(self, type, payload): self.type = type self.payload = payload @@ -481,7 +489,7 @@ class MobX(Message): class NgRx(Message): __id__ = 47 - def __init__(self, action: str, state: str, duration): + def __init__(self, action, state, duration): self.action = action self.state = state self.duration = duration @@ -490,8 +498,7 @@ class NgRx(Message): class GraphQL(Message): __id__ = 48 - def __init__(self, operation_kind: str, operation_name: str, - variables: str, response: str): + def __init__(self, operation_kind, operation_name, variables, response): self.operation_kind = operation_kind self.operation_name = operation_name self.variables = variables @@ -501,8 +508,7 @@ class GraphQL(Message): class PerformanceTrack(Message): __id__ = 49 - def __init__(self, frames: int, ticks: int, - total_js_heap_size, used_js_heap_size): + def __init__(self, frames, ticks, total_js_heap_size, used_js_heap_size): self.frames = frames self.ticks = ticks self.total_js_heap_size = total_js_heap_size @@ -512,17 +518,19 @@ class PerformanceTrack(Message): class GraphQLEvent(Message): __id__ = 50 - def __init__(self, message_id, timestamp, name: str): + def __init__(self, message_id, timestamp, operation_kind, operation_name, variables, response): self.message_id = message_id self.timestamp = timestamp - self.name = name + self.operation_kind = operation_kind + self.operation_name = operation_name + self.variables = variables + self.response = response class FetchEvent(Message): __id__ = 51 - def __init__(self, message_id, timestamp, method: str, url, request, response: str, - status, duration): + def __init__(self, message_id, timestamp, method, url, request, response, status, duration): self.message_id = message_id self.timestamp = timestamp self.method = method @@ -533,7 +541,7 @@ class FetchEvent(Message): self.duration = duration -class DomDrop(Message): +class DOMDrop(Message): __id__ = 52 def __init__(self, timestamp): @@ -543,8 +551,7 @@ class DomDrop(Message): class ResourceTiming(Message): __id__ = 53 - def __init__(self, timestamp, duration, ttfb, header_size, encoded_body_size, - decoded_body_size, url, initiator): + def __init__(self, timestamp, duration, ttfb, header_size, encoded_body_size, decoded_body_size, url, initiator): self.timestamp = timestamp self.duration = duration self.ttfb = ttfb @@ -558,7 +565,7 @@ class ResourceTiming(Message): class ConnectionInformation(Message): __id__ = 54 - def __init__(self, downlink, type: str): + def __init__(self, downlink, type): self.downlink = downlink self.type = type @@ -566,19 +573,14 @@ class ConnectionInformation(Message): class SetPageVisibility(Message): __id__ = 55 - def __init__(self, hidden: bool): + def __init__(self, hidden): self.hidden = hidden class PerformanceTrackAggr(Message): __id__ = 56 - def __init__(self, timestamp_start, timestamp_end, min_fps, avg_fps, - max_fps, min_cpu, avg_cpu, max_cpu, - min_total_js_heap_size, avg_total_js_heap_size, - max_total_js_heap_size, min_used_js_heap_size, - avg_used_js_heap_size, max_used_js_heap_size - ): + def __init__(self, timestamp_start, timestamp_end, min_fps, avg_fps, max_fps, min_cpu, avg_cpu, max_cpu, min_total_js_heap_size, avg_total_js_heap_size, max_total_js_heap_size, min_used_js_heap_size, avg_used_js_heap_size, max_used_js_heap_size): self.timestamp_start = timestamp_start self.timestamp_end = timestamp_end self.min_fps = min_fps @@ -598,8 +600,7 @@ class PerformanceTrackAggr(Message): class LongTask(Message): __id__ = 59 - def __init__(self, timestamp, duration, context, container_type, container_src: str, - container_id: str, container_name: str): + def __init__(self, timestamp, duration, context, container_type, container_src, container_id, container_name): self.timestamp = timestamp self.duration = duration self.context = context @@ -609,20 +610,20 @@ class LongTask(Message): self.container_name = container_name -class SetNodeURLBasedAttribute(Message): +class SetNodeAttributeURLBased(Message): __id__ = 60 - def __init__(self, id, name: str, value: str, base_url: str): + def __init__(self, id, name, value, base_url): self.id = id self.name = name self.value = value self.base_url = base_url -class SetStyleData(Message): +class SetCSSDataURLBased(Message): __id__ = 61 - def __init__(self, id, data: str, base_url: str): + def __init__(self, id, data, base_url): self.id = id self.data = data self.base_url = base_url @@ -631,8 +632,7 @@ class SetStyleData(Message): class IssueEvent(Message): __id__ = 62 - def __init__(self, message_id, timestamp, type: str, context_string: str, - context: str, payload: str): + def __init__(self, message_id, timestamp, type, context_string, context, payload): self.message_id = message_id self.timestamp = timestamp self.type = type @@ -644,7 +644,7 @@ class IssueEvent(Message): class TechnicalInfo(Message): __id__ = 63 - def __init__(self, type: str, value: str): + def __init__(self, type, value): self.type = type self.value = value @@ -652,15 +652,11 @@ class TechnicalInfo(Message): class CustomIssue(Message): __id__ = 64 - def __init__(self, name: str, payload: str): + def __init__(self, name, payload): self.name = name self.payload = payload -class PageClose(Message): - __id__ = 65 - - class AssetCache(Message): __id__ = 66 @@ -681,7 +677,7 @@ class CSSInsertRuleURLBased(Message): class MouseClick(Message): __id__ = 69 - def __init__(self, id, hesitation_time, label: str, selector): + def __init__(self, id, hesitation_time, label, selector): self.id = id self.hesitation_time = hesitation_time self.label = label @@ -696,20 +692,19 @@ class CreateIFrameDocument(Message): self.id = id -class BatchMeta(Message): - __id__ = 80 +class IOSBatchMeta(Message): + __id__ = 107 - def __init__(self, page_no, first_index, timestamp): - self.page_no = page_no - self.first_index = first_index + def __init__(self, timestamp, length, first_index): self.timestamp = timestamp + self.length = length + self.first_index = first_index + class IOSSessionStart(Message): __id__ = 90 - def __init__(self, timestamp, project_id, tracker_version: str, - rev_id: str, user_uuid: str, user_os: str, user_os_version: str, - user_device: str, user_device_type: str, user_country: str): + def __init__(self, timestamp, project_id, tracker_version, rev_id, user_uuid, user_os, user_os_version, user_device, user_device_type, user_country): self.timestamp = timestamp self.project_id = project_id self.tracker_version = tracker_version @@ -732,7 +727,7 @@ class IOSSessionEnd(Message): class IOSMetadata(Message): __id__ = 92 - def __init__(self, timestamp, length, key: str, value: str): + def __init__(self, timestamp, length, key, value): self.timestamp = timestamp self.length = length self.key = key @@ -742,7 +737,7 @@ class IOSMetadata(Message): class IOSCustomEvent(Message): __id__ = 93 - def __init__(self, timestamp, length, name: str, payload: str): + def __init__(self, timestamp, length, name, payload): self.timestamp = timestamp self.length = length self.name = name @@ -752,7 +747,7 @@ class IOSCustomEvent(Message): class IOSUserID(Message): __id__ = 94 - def __init__(self, timestamp, length, value: str): + def __init__(self, timestamp, length, value): self.timestamp = timestamp self.length = length self.value = value @@ -761,7 +756,7 @@ class IOSUserID(Message): class IOSUserAnonymousID(Message): __id__ = 95 - def __init__(self, timestamp, length, value: str): + def __init__(self, timestamp, length, value): self.timestamp = timestamp self.length = length self.value = value @@ -782,7 +777,7 @@ class IOSScreenChanges(Message): class IOSCrash(Message): __id__ = 97 - def __init__(self, timestamp, length, name: str, reason: str, stacktrace): + def __init__(self, timestamp, length, name, reason, stacktrace): self.timestamp = timestamp self.length = length self.name = name @@ -803,7 +798,7 @@ class IOSScreenEnter(Message): class IOSScreenLeave(Message): __id__ = 99 - def __init__(self, timestamp, length, title: str, view_name: str): + def __init__(self, timestamp, length, title, view_name): self.timestamp = timestamp self.length = length self.title = title @@ -824,9 +819,10 @@ class IOSClickEvent(Message): class IOSInputEvent(Message): __id__ = 101 - def __init__(self, timestamp, length, value: str, value_masked: bool, label: str): + def __init__(self, timestamp, length, value, value_masked, label): self.timestamp = timestamp self.length = length + self.value = value self.value_masked = value_masked self.label = label @@ -834,7 +830,7 @@ class IOSInputEvent(Message): class IOSPerformanceEvent(Message): __id__ = 102 - def __init__(self, timestamp, length, name: str, value): + def __init__(self, timestamp, length, name, value): self.timestamp = timestamp self.length = length self.name = name @@ -844,7 +840,7 @@ class IOSPerformanceEvent(Message): class IOSLog(Message): __id__ = 103 - def __init__(self, timestamp, length, severity: str, content: str): + def __init__(self, timestamp, length, severity, content): self.timestamp = timestamp self.length = length self.severity = severity @@ -854,7 +850,7 @@ class IOSLog(Message): class IOSInternalError(Message): __id__ = 104 - def __init__(self, timestamp, length, content: str): + def __init__(self, timestamp, length, content): self.timestamp = timestamp self.length = length self.content = content @@ -863,7 +859,7 @@ class IOSInternalError(Message): class IOSNetworkCall(Message): __id__ = 105 - def __init__(self, timestamp, length, duration, headers, body, url, success: bool, method: str, status): + def __init__(self, timestamp, length, duration, headers, body, url, success, method, status): self.timestamp = timestamp self.length = length self.duration = duration @@ -875,23 +871,10 @@ class IOSNetworkCall(Message): self.status = status -class IOSBatchMeta(Message): - __id__ = 107 - - def __init__(self, timestamp, length, first_index): - self.timestamp = timestamp - self.length = length - self.first_index = first_index - - class IOSPerformanceAggregated(Message): __id__ = 110 - def __init__(self, timestamp_start, timestamp_end, min_fps, avg_fps, - max_fps, min_cpu, avg_cpu, max_cpu, - min_memory, avg_memory, max_memory, - min_battery, avg_battery, max_battery - ): + def __init__(self, timestamp_start, timestamp_end, min_fps, avg_fps, max_fps, min_cpu, avg_cpu, max_cpu, min_memory, avg_memory, max_memory, min_battery, avg_battery, max_battery): self.timestamp_start = timestamp_start self.timestamp_end = timestamp_end self.min_fps = min_fps @@ -911,9 +894,11 @@ class IOSPerformanceAggregated(Message): class IOSIssueEvent(Message): __id__ = 111 - def __init__(self, timestamp, type: str, context_string: str, context: str, payload: str): + def __init__(self, timestamp, type, context_string, context, payload): self.timestamp = timestamp self.type = type self.context_string = context_string self.context = context self.payload = payload + + diff --git a/ee/connectors/msgcodec/msgcodec.py b/ee/connectors/msgcodec/msgcodec.py new file mode 100644 index 000000000..3cd6a846a --- /dev/null +++ b/ee/connectors/msgcodec/msgcodec.py @@ -0,0 +1,739 @@ +# Auto-generated, do not edit + +from msgcodec.codec import Codec +from msgcodec.messages import * +import io + +class MessageCodec(Codec): + + def read_message_id(self, reader: io.BytesIO) -> int: + """ + Read and return the first byte where the message id is encoded + """ + id_ = self.read_uint(reader) + return id_ + + def encode(self, m: Message) -> bytes: + ... + + def decode(self, b: bytes) -> Message: + reader = io.BytesIO(b) + message_id = self.read_message_id(reader) + + if message_id == 80: + return BatchMeta( + page_no=self.read_uint(reader), + first_index=self.read_uint(reader), + timestamp=self.read_int(reader) + ) + + if message_id == 81: + return BatchMetadata( + version=self.read_uint(reader), + page_no=self.read_uint(reader), + first_index=self.read_uint(reader), + timestamp=self.read_int(reader), + location=self.read_string(reader) + ) + + if message_id == 82: + return PartitionedMessage( + part_no=self.read_uint(reader), + part_total=self.read_uint(reader) + ) + + if message_id == 0: + return Timestamp( + timestamp=self.read_uint(reader) + ) + + if message_id == 1: + return SessionStart( + timestamp=self.read_uint(reader), + project_id=self.read_uint(reader), + tracker_version=self.read_string(reader), + rev_id=self.read_string(reader), + user_uuid=self.read_string(reader), + user_agent=self.read_string(reader), + user_os=self.read_string(reader), + user_os_version=self.read_string(reader), + user_browser=self.read_string(reader), + user_browser_version=self.read_string(reader), + user_device=self.read_string(reader), + user_device_type=self.read_string(reader), + user_device_memory_size=self.read_uint(reader), + user_device_heap_size=self.read_uint(reader), + user_country=self.read_string(reader), + user_id=self.read_string(reader) + ) + + if message_id == 2: + return SessionDisconnect( + timestamp=self.read_uint(reader) + ) + + if message_id == 3: + return SessionEnd( + timestamp=self.read_uint(reader) + ) + + if message_id == 4: + return SetPageLocation( + url=self.read_string(reader), + referrer=self.read_string(reader), + navigation_start=self.read_uint(reader) + ) + + if message_id == 5: + return SetViewportSize( + width=self.read_uint(reader), + height=self.read_uint(reader) + ) + + if message_id == 6: + return SetViewportScroll( + x=self.read_int(reader), + y=self.read_int(reader) + ) + + if message_id == 7: + return CreateDocument( + + ) + + if message_id == 8: + return CreateElementNode( + id=self.read_uint(reader), + parent_id=self.read_uint(reader), + index=self.read_uint(reader), + tag=self.read_string(reader), + svg=self.read_boolean(reader) + ) + + if message_id == 9: + return CreateTextNode( + id=self.read_uint(reader), + parent_id=self.read_uint(reader), + index=self.read_uint(reader) + ) + + if message_id == 10: + return MoveNode( + id=self.read_uint(reader), + parent_id=self.read_uint(reader), + index=self.read_uint(reader) + ) + + if message_id == 11: + return RemoveNode( + id=self.read_uint(reader) + ) + + if message_id == 12: + return SetNodeAttribute( + id=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 13: + return RemoveNodeAttribute( + id=self.read_uint(reader), + name=self.read_string(reader) + ) + + if message_id == 14: + return SetNodeData( + id=self.read_uint(reader), + data=self.read_string(reader) + ) + + if message_id == 15: + return SetCSSData( + id=self.read_uint(reader), + data=self.read_string(reader) + ) + + if message_id == 16: + return SetNodeScroll( + id=self.read_uint(reader), + x=self.read_int(reader), + y=self.read_int(reader) + ) + + if message_id == 17: + return SetInputTarget( + id=self.read_uint(reader), + label=self.read_string(reader) + ) + + if message_id == 18: + return SetInputValue( + id=self.read_uint(reader), + value=self.read_string(reader), + mask=self.read_int(reader) + ) + + if message_id == 19: + return SetInputChecked( + id=self.read_uint(reader), + checked=self.read_boolean(reader) + ) + + if message_id == 20: + return MouseMove( + x=self.read_uint(reader), + y=self.read_uint(reader) + ) + + if message_id == 21: + return MouseClickDepricated( + id=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader) + ) + + if message_id == 22: + return ConsoleLog( + level=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 23: + return PageLoadTiming( + request_start=self.read_uint(reader), + response_start=self.read_uint(reader), + response_end=self.read_uint(reader), + dom_content_loaded_event_start=self.read_uint(reader), + dom_content_loaded_event_end=self.read_uint(reader), + load_event_start=self.read_uint(reader), + load_event_end=self.read_uint(reader), + first_paint=self.read_uint(reader), + first_contentful_paint=self.read_uint(reader) + ) + + if message_id == 24: + return PageRenderTiming( + speed_index=self.read_uint(reader), + visually_complete=self.read_uint(reader), + time_to_interactive=self.read_uint(reader) + ) + + if message_id == 25: + return JSException( + name=self.read_string(reader), + message=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 26: + return IntegrationEvent( + timestamp=self.read_uint(reader), + source=self.read_string(reader), + name=self.read_string(reader), + message=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 27: + return RawCustomEvent( + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 28: + return UserID( + id=self.read_string(reader) + ) + + if message_id == 29: + return UserAnonymousID( + id=self.read_string(reader) + ) + + if message_id == 30: + return Metadata( + key=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 31: + return PageEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + url=self.read_string(reader), + referrer=self.read_string(reader), + loaded=self.read_boolean(reader), + request_start=self.read_uint(reader), + response_start=self.read_uint(reader), + response_end=self.read_uint(reader), + dom_content_loaded_event_start=self.read_uint(reader), + dom_content_loaded_event_end=self.read_uint(reader), + load_event_start=self.read_uint(reader), + load_event_end=self.read_uint(reader), + first_paint=self.read_uint(reader), + first_contentful_paint=self.read_uint(reader), + speed_index=self.read_uint(reader), + visually_complete=self.read_uint(reader), + time_to_interactive=self.read_uint(reader) + ) + + if message_id == 32: + return InputEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + value=self.read_string(reader), + value_masked=self.read_boolean(reader), + label=self.read_string(reader) + ) + + if message_id == 33: + return ClickEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader), + selector=self.read_string(reader) + ) + + if message_id == 34: + return ErrorEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + source=self.read_string(reader), + name=self.read_string(reader), + message=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 35: + return ResourceEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + duration=self.read_uint(reader), + ttfb=self.read_uint(reader), + header_size=self.read_uint(reader), + encoded_body_size=self.read_uint(reader), + decoded_body_size=self.read_uint(reader), + url=self.read_string(reader), + type=self.read_string(reader), + success=self.read_boolean(reader), + method=self.read_string(reader), + status=self.read_uint(reader) + ) + + if message_id == 36: + return CustomEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 37: + return CSSInsertRule( + id=self.read_uint(reader), + rule=self.read_string(reader), + index=self.read_uint(reader) + ) + + if message_id == 38: + return CSSDeleteRule( + id=self.read_uint(reader), + index=self.read_uint(reader) + ) + + if message_id == 39: + return Fetch( + method=self.read_string(reader), + url=self.read_string(reader), + request=self.read_string(reader), + response=self.read_string(reader), + status=self.read_uint(reader), + timestamp=self.read_uint(reader), + duration=self.read_uint(reader) + ) + + if message_id == 40: + return Profiler( + name=self.read_string(reader), + duration=self.read_uint(reader), + args=self.read_string(reader), + result=self.read_string(reader) + ) + + if message_id == 41: + return OTable( + key=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 42: + return StateAction( + type=self.read_string(reader) + ) + + if message_id == 43: + return StateActionEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + type=self.read_string(reader) + ) + + if message_id == 44: + return Redux( + action=self.read_string(reader), + state=self.read_string(reader), + duration=self.read_uint(reader) + ) + + if message_id == 45: + return Vuex( + mutation=self.read_string(reader), + state=self.read_string(reader) + ) + + if message_id == 46: + return MobX( + type=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 47: + return NgRx( + action=self.read_string(reader), + state=self.read_string(reader), + duration=self.read_uint(reader) + ) + + if message_id == 48: + return GraphQL( + operation_kind=self.read_string(reader), + operation_name=self.read_string(reader), + variables=self.read_string(reader), + response=self.read_string(reader) + ) + + if message_id == 49: + return PerformanceTrack( + frames=self.read_int(reader), + ticks=self.read_int(reader), + total_js_heap_size=self.read_uint(reader), + used_js_heap_size=self.read_uint(reader) + ) + + if message_id == 50: + return GraphQLEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + operation_kind=self.read_string(reader), + operation_name=self.read_string(reader), + variables=self.read_string(reader), + response=self.read_string(reader) + ) + + if message_id == 51: + return FetchEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + method=self.read_string(reader), + url=self.read_string(reader), + request=self.read_string(reader), + response=self.read_string(reader), + status=self.read_uint(reader), + duration=self.read_uint(reader) + ) + + if message_id == 52: + return DOMDrop( + timestamp=self.read_uint(reader) + ) + + if message_id == 53: + return ResourceTiming( + timestamp=self.read_uint(reader), + duration=self.read_uint(reader), + ttfb=self.read_uint(reader), + header_size=self.read_uint(reader), + encoded_body_size=self.read_uint(reader), + decoded_body_size=self.read_uint(reader), + url=self.read_string(reader), + initiator=self.read_string(reader) + ) + + if message_id == 54: + return ConnectionInformation( + downlink=self.read_uint(reader), + type=self.read_string(reader) + ) + + if message_id == 55: + return SetPageVisibility( + hidden=self.read_boolean(reader) + ) + + if message_id == 56: + return PerformanceTrackAggr( + timestamp_start=self.read_uint(reader), + timestamp_end=self.read_uint(reader), + min_fps=self.read_uint(reader), + avg_fps=self.read_uint(reader), + max_fps=self.read_uint(reader), + min_cpu=self.read_uint(reader), + avg_cpu=self.read_uint(reader), + max_cpu=self.read_uint(reader), + min_total_js_heap_size=self.read_uint(reader), + avg_total_js_heap_size=self.read_uint(reader), + max_total_js_heap_size=self.read_uint(reader), + min_used_js_heap_size=self.read_uint(reader), + avg_used_js_heap_size=self.read_uint(reader), + max_used_js_heap_size=self.read_uint(reader) + ) + + if message_id == 59: + return LongTask( + timestamp=self.read_uint(reader), + duration=self.read_uint(reader), + context=self.read_uint(reader), + container_type=self.read_uint(reader), + container_src=self.read_string(reader), + container_id=self.read_string(reader), + container_name=self.read_string(reader) + ) + + if message_id == 60: + return SetNodeAttributeURLBased( + id=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_string(reader), + base_url=self.read_string(reader) + ) + + if message_id == 61: + return SetCSSDataURLBased( + id=self.read_uint(reader), + data=self.read_string(reader), + base_url=self.read_string(reader) + ) + + if message_id == 62: + return IssueEvent( + message_id=self.read_uint(reader), + timestamp=self.read_uint(reader), + type=self.read_string(reader), + context_string=self.read_string(reader), + context=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 63: + return TechnicalInfo( + type=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 64: + return CustomIssue( + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 66: + return AssetCache( + url=self.read_string(reader) + ) + + if message_id == 67: + return CSSInsertRuleURLBased( + id=self.read_uint(reader), + rule=self.read_string(reader), + index=self.read_uint(reader), + base_url=self.read_string(reader) + ) + + if message_id == 69: + return MouseClick( + id=self.read_uint(reader), + hesitation_time=self.read_uint(reader), + label=self.read_string(reader), + selector=self.read_string(reader) + ) + + if message_id == 70: + return CreateIFrameDocument( + frame_id=self.read_uint(reader), + id=self.read_uint(reader) + ) + + if message_id == 107: + return IOSBatchMeta( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + first_index=self.read_uint(reader) + ) + + if message_id == 90: + return IOSSessionStart( + timestamp=self.read_uint(reader), + project_id=self.read_uint(reader), + tracker_version=self.read_string(reader), + rev_id=self.read_string(reader), + user_uuid=self.read_string(reader), + user_os=self.read_string(reader), + user_os_version=self.read_string(reader), + user_device=self.read_string(reader), + user_device_type=self.read_string(reader), + user_country=self.read_string(reader) + ) + + if message_id == 91: + return IOSSessionEnd( + timestamp=self.read_uint(reader) + ) + + if message_id == 92: + return IOSMetadata( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + key=self.read_string(reader), + value=self.read_string(reader) + ) + + if message_id == 93: + return IOSCustomEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + payload=self.read_string(reader) + ) + + if message_id == 94: + return IOSUserID( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader) + ) + + if message_id == 95: + return IOSUserAnonymousID( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader) + ) + + if message_id == 96: + return IOSScreenChanges( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + x=self.read_uint(reader), + y=self.read_uint(reader), + width=self.read_uint(reader), + height=self.read_uint(reader) + ) + + if message_id == 97: + return IOSCrash( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + reason=self.read_string(reader), + stacktrace=self.read_string(reader) + ) + + if message_id == 98: + return IOSScreenEnter( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + title=self.read_string(reader), + view_name=self.read_string(reader) + ) + + if message_id == 99: + return IOSScreenLeave( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + title=self.read_string(reader), + view_name=self.read_string(reader) + ) + + if message_id == 100: + return IOSClickEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + label=self.read_string(reader), + x=self.read_uint(reader), + y=self.read_uint(reader) + ) + + if message_id == 101: + return IOSInputEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + value=self.read_string(reader), + value_masked=self.read_boolean(reader), + label=self.read_string(reader) + ) + + if message_id == 102: + return IOSPerformanceEvent( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + name=self.read_string(reader), + value=self.read_uint(reader) + ) + + if message_id == 103: + return IOSLog( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + severity=self.read_string(reader), + content=self.read_string(reader) + ) + + if message_id == 104: + return IOSInternalError( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + content=self.read_string(reader) + ) + + if message_id == 105: + return IOSNetworkCall( + timestamp=self.read_uint(reader), + length=self.read_uint(reader), + duration=self.read_uint(reader), + headers=self.read_string(reader), + body=self.read_string(reader), + url=self.read_string(reader), + success=self.read_boolean(reader), + method=self.read_string(reader), + status=self.read_uint(reader) + ) + + if message_id == 110: + return IOSPerformanceAggregated( + timestamp_start=self.read_uint(reader), + timestamp_end=self.read_uint(reader), + min_fps=self.read_uint(reader), + avg_fps=self.read_uint(reader), + max_fps=self.read_uint(reader), + min_cpu=self.read_uint(reader), + avg_cpu=self.read_uint(reader), + max_cpu=self.read_uint(reader), + min_memory=self.read_uint(reader), + avg_memory=self.read_uint(reader), + max_memory=self.read_uint(reader), + min_battery=self.read_uint(reader), + avg_battery=self.read_uint(reader), + max_battery=self.read_uint(reader) + ) + + if message_id == 111: + return IOSIssueEvent( + timestamp=self.read_uint(reader), + type=self.read_string(reader), + context_string=self.read_string(reader), + context=self.read_string(reader), + payload=self.read_string(reader) + ) + From 846c167db3c390ecda3dee79662484141fd51a0d Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Thu, 25 Aug 2022 12:10:45 +0200 Subject: [PATCH 32/33] added BatchMetadata and handle the new message format within it --- ee/connectors/handler.py | 10 ++++++++++ ee/connectors/msgcodec/codec.py | 2 ++ ee/connectors/msgcodec/msgcodec.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/ee/connectors/handler.py b/ee/connectors/handler.py index 6f6316545..1442c9148 100644 --- a/ee/connectors/handler.py +++ b/ee/connectors/handler.py @@ -146,6 +146,16 @@ def handle_session(n: Session, message: Message) -> Optional[Session]: n.batchmeta_timestamp = message.timestamp return n + if isinstance(message, BatchMetadata): + n.batchmeta_page_no = message.page_no + n.batchmeta_first_index = message.first_index + n.batchmeta_timestamp = message.timestamp + return n + + if isinstance(message, PartitionedMessage): + n.part_no = message.part_no + n.part_total = message.part_total + # if isinstance(message, IOSBatchMeta): # n.iosbatchmeta_page_no = message.page_no # n.iosbatchmeta_first_index = message.first_index diff --git a/ee/connectors/msgcodec/codec.py b/ee/connectors/msgcodec/codec.py index 5aeb0e4ed..577f02f48 100644 --- a/ee/connectors/msgcodec/codec.py +++ b/ee/connectors/msgcodec/codec.py @@ -21,6 +21,8 @@ class Codec: i = 0 # n of byte (max 9 for uint64) while True: b = reader.read(1) + if len(b) == 0: + raise IndexError('bytes out of range') num = int.from_bytes(b, "big", signed=False) # print(i, x) diff --git a/ee/connectors/msgcodec/msgcodec.py b/ee/connectors/msgcodec/msgcodec.py index 3cd6a846a..03515316e 100644 --- a/ee/connectors/msgcodec/msgcodec.py +++ b/ee/connectors/msgcodec/msgcodec.py @@ -18,7 +18,36 @@ class MessageCodec(Codec): def decode(self, b: bytes) -> Message: reader = io.BytesIO(b) + return self.read_head_message(reader) + + def decode_detailed(self, b: bytes): + reader = io.BytesIO(b) + messages_list = list() + messages_list.append(self.handler(reader, 0)) + if isinstance(messages_list[0], BatchMeta): + mode = 0 + elif isinstance(messages_list[0], BatchMetadata): + mode = 1 + else: + return messages_list + while True: + try: + messages_list.append(self.handler(reader, mode)) + except IndexError: + break + return messages_list + + def handler(self, reader: io.BytesIO, mode=0): message_id = self.read_message_id(reader) + if mode == 1: + reader.read(3) + return self.read_head_message(reader, message_id) + elif mode == 0: + return self.read_head_message(reader, message_id) + else: + raise IOError() + + def read_head_message(self, reader: io.BytesIO, message_id: int): if message_id == 80: return BatchMeta( From 7fc8fab4d7d40248f7a22ca076ea9d0d12179c6e Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Thu, 25 Aug 2022 14:34:33 +0200 Subject: [PATCH 33/33] Minor issues solved --- ee/connectors/handler.py | 10 ++++++++++ ee/connectors/msgcodec/msgcodec.py | 27 ++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/ee/connectors/handler.py b/ee/connectors/handler.py index 1442c9148..02f393091 100644 --- a/ee/connectors/handler.py +++ b/ee/connectors/handler.py @@ -401,6 +401,16 @@ def handle_message(message: Message) -> Optional[DetailedEvent]: n.batchmeta_timestamp = message.timestamp return n + if isinstance(message, BatchMetadata): + n.batchmeta_page_no = message.page_no + n.batchmeta_first_index = message.first_index + n.batchmeta_timestamp = message.timestamp + return n + + if isinstance(message, PartitionedMessage): + n.part_no = message.part_no + n.part_total = message.part_total + if isinstance(message, PerformanceTrack): n.performancetrack_frames = message.frames n.performancetrack_ticks = message.ticks diff --git a/ee/connectors/msgcodec/msgcodec.py b/ee/connectors/msgcodec/msgcodec.py index 03515316e..b390643ce 100644 --- a/ee/connectors/msgcodec/msgcodec.py +++ b/ee/connectors/msgcodec/msgcodec.py @@ -20,13 +20,37 @@ class MessageCodec(Codec): reader = io.BytesIO(b) return self.read_head_message(reader) + @staticmethod + def check_message_id(b: bytes) -> int: + """ + todo: make it static and without reader. It's just the first byte + Read and return the first byte where the message id is encoded + """ + reader = io.BytesIO(b) + id_ = Codec.read_uint(reader) + + return id_ + + @staticmethod + def decode_key(b) -> int: + """ + Decode the message key (encoded with little endian) + """ + try: + decoded = int.from_bytes(b, "little", signed=False) + except Exception as e: + raise UnicodeDecodeError(f"Error while decoding message key (SessionID) from {b}\n{e}") + return decoded + def decode_detailed(self, b: bytes): reader = io.BytesIO(b) messages_list = list() messages_list.append(self.handler(reader, 0)) if isinstance(messages_list[0], BatchMeta): + # Old BatchMeta mode = 0 elif isinstance(messages_list[0], BatchMetadata): + # New BatchMeta mode = 1 else: return messages_list @@ -40,15 +64,16 @@ class MessageCodec(Codec): def handler(self, reader: io.BytesIO, mode=0): message_id = self.read_message_id(reader) if mode == 1: + # We skip the three bytes representing the length of message. It can be used to skip unwanted messages reader.read(3) return self.read_head_message(reader, message_id) elif mode == 0: + # Old format with no bytes for message length return self.read_head_message(reader, message_id) else: raise IOError() def read_head_message(self, reader: io.BytesIO, message_id: int): - if message_id == 80: return BatchMeta( page_no=self.read_uint(reader),