From c38255308a7564f6ab2f796a797c24b693592d76 Mon Sep 17 00:00:00 2001 From: mauricio garcia suarez Date: Fri, 29 Jul 2022 12:13:47 +0200 Subject: [PATCH] Added new Indexes, updated source and python test of quickwit --- README.md | 2 +- index-config.yaml => index-config-fetch.yaml | 17 +++-- index-config-graphql.yaml | 30 +++++++++ index-config-pageevent.yaml | 68 ++++++++++++++++++++ kafka_sample.py | 39 +++++++++-- sources.yaml | 29 +++++++++ 6 files changed, 170 insertions(+), 15 deletions(-) rename index-config.yaml => index-config-fetch.yaml (74%) create mode 100644 index-config-graphql.yaml create mode 100644 index-config-pageevent.yaml create mode 100644 sources.yaml diff --git a/README.md b/README.md index 7b566ed13..f5110c7ee 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,6 @@ To deploy the indexer, search and UI services run the command: bash run_quickwit.sh ``` UI server will start at localhost:7280. The api can also be called through the url http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query={your_query} for example -```angular2html +```bash curl "http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query=body:error" ``` \ No newline at end of file diff --git a/index-config.yaml b/index-config-fetch.yaml similarity index 74% rename from index-config.yaml rename to index-config-fetch.yaml index 8e68eb73d..1a7577450 100644 --- a/index-config.yaml +++ b/index-config-fetch.yaml @@ -4,9 +4,10 @@ version: 0 -index_id: quickwit-kafka +index_id: fetch doc_mapping: + mode: strict field_mappings: - name: method type: text @@ -25,17 +26,15 @@ doc_mapping: tokenizer: default record: position - name: status - type: text - tokenizer: default + type: i64 + fast: true record: position - name: timestamp - type: text - tokenizer: default - record: position + type: i64 + fast: true - name: duration - type: text - tokenizer: default - record: position + type: i64 + fast: true search_settings: default_search_fields: [url, request, response] diff --git a/index-config-graphql.yaml b/index-config-graphql.yaml new file mode 100644 index 000000000..bac1d8406 --- /dev/null +++ b/index-config-graphql.yaml @@ -0,0 +1,30 @@ +# +# Index config file for gh-archive dataset. +# + +version: 0 + +index_id: graphql + +doc_mapping: + mode: strict + field_mappings: + - name: operation_kind + type: text + tokenizer: default + record: position + - name: operation_name + type: text + tokenizer: default + record: position + - name: variables + type: text + tokenizer: default + record: position + - name: response + type: text + tokenizer: default + record: position + +search_settings: + default_search_fields: [operation_kind, operation_name, variables] diff --git a/index-config-pageevent.yaml b/index-config-pageevent.yaml new file mode 100644 index 000000000..90d8d152f --- /dev/null +++ b/index-config-pageevent.yaml @@ -0,0 +1,68 @@ +# +# Index config file for gh-archive dataset. +# + +version: 0 + +index_id: pageevent + +doc_mapping: + mode: strict + field_mappings: + - name: message_id + type: i64 + fast: true + record: position + - name: timestamp + type: i64 + fast: true + - name: url + type: text + tokenizer: default + record: position + - name: referrer + type: text + tokenizer: default + record: position + - name: loaded + type: i64 + fast: true + - name: request_start + type: i64 + fast: true + - name: response_start + type: i64 + fast: true + - name: response_end + type: i64 + fast: true + - name: dom_content_loaded_event_start + type: i64 + fast: true + - name: dom_content_loaded_event_end + type: i64 + fast: true + - name: load_event_start + type: i64 + fast: true + - name: load_event_end + type: i64 + fast: true + - name: first_paint + type: i64 + fast: true + - name: first_contentful_paint + type: i64 + fast: true + - name: speed_index + type: i64 + fast: true + - name: visually_complete + type: i64 + fast: true + - name: time_to_interactive + type: i64 + fast: true + +search_settings: + default_search_fields: [url, referrer, visually_complete] diff --git a/kafka_sample.py b/kafka_sample.py index 0ebce304c..47e9f8001 100644 --- a/kafka_sample.py +++ b/kafka_sample.py @@ -5,14 +5,13 @@ from datetime import datetime from collections import defaultdict from msgcodec.codec import MessageCodec -from msgcodec.messages import SessionEnd, Fetch, FetchEvent, PageEvent, SetCSSData, SetStyleData +from msgcodec.messages import Fetch, FetchEvent, PageEvent, GraphQL import json import getopt, sys n = 0 -fetch_keys = ['method', 'url', 'request', 'response', 'status', 'timestamp', 'duration'] -def transform(data): +def transform_fetch(data): global n n += 1 return { @@ -20,6 +19,28 @@ def transform(data): 'status': data.status, 'timestamp': data.timestamp, 'duration': data.duration } +def transform_graphql(data): + global n + n += 1 + return { + 'operation_kind': data.operation_kind, 'operation_name': data.operation_name, + 'variables': data.variables, 'response': data.response + } + +def transform_pageevent(data): + global n + n += 1 + return {'massage_id': data.message_id, 'timestamp': data.timestamp, 'url': data.timestamp, + 'referrer': data.referrer, 'loaded': data.loaded, 'request_start': data.request_start, + 'response_start': data.response_start, 'response_end': data.response_end, + 'dom_content_loaded_event_start': data.dom_content_loaded_event_start, + 'dom_content_loaded_event_end': data.dom_content_loaded_event_end, + 'load_event_start': data.load_event_start, 'load_event_end': data.load_event_end, + 'first_paint': data.first_paint, 'first_contentful_paint': data.first_contentful_paint, + 'speed_index': data.speed_index, 'visually_complete': data.visually_complete, + 'time_to_interactive': data.time_to_interactive + } + def create_producer(): producer = KafkaProducer(#security_protocol="SSL", bootstrap_servers=os.environ['KAFKA_SERVER_2'], @@ -60,8 +81,16 @@ def consumer_producer_end(): for message in messages: send = False if isinstance(message, Fetch) or isinstance(message, FetchEvent): - producer.send('quickwit-kafka', value=transform(message)) - print(f'added message {n}') + producer.send('quickwit-kafka', value=transform_fetch(message)) + print(f'added message {n} type Fetch') + sleep(5) + if isinstance(message, GraphQL): + producer.send('quickwit-kafka', value=transform_graphql(message)) + print(f'added message {n} type GraphQL') + sleep(5) + if isinstance(message, PageEvent): + producer.send('quickwit-kafka', value=transform_pageevent(message)) + print(f'added message {n} type PageEvent') sleep(5) diff --git a/sources.yaml b/sources.yaml new file mode 100644 index 000000000..7dad76619 --- /dev/null +++ b/sources.yaml @@ -0,0 +1,29 @@ +# +# Source config file. +# + +sources: + - fetch: fetch-kafka + source_type: kafka + params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: fetch-consumer + # security.protocol: SSL + - graphql: graphql-kafka + source_type: kafka + params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: graphql-consumer + # security.protocol: SSL + - graphql: graphql-pageevent + source_type: kafka + params: + topic: quickwit-kafka + client_params: + bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092 + group.id: pageevent-consumer + # security.protocol: SSL