Added new Indexes, updated source and python test of quickwit

This commit is contained in:
mauricio garcia suarez 2022-07-29 12:13:47 +02:00
parent 7b77a0e90e
commit c38255308a
6 changed files with 170 additions and 15 deletions

View file

@ -34,6 +34,6 @@ To deploy the indexer, search and UI services run the command:
bash run_quickwit.sh
```
UI server will start at localhost:7280. The api can also be called through the url http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query={your_query} for example
```angular2html
```bash
curl "http://127.0.0.1:7280/api/v1/quickwit-kafka/search?query=body:error"
```

View file

@ -4,9 +4,10 @@
version: 0
index_id: quickwit-kafka
index_id: fetch
doc_mapping:
mode: strict
field_mappings:
- name: method
type: text
@ -25,17 +26,15 @@ doc_mapping:
tokenizer: default
record: position
- name: status
type: text
tokenizer: default
type: i64
fast: true
record: position
- name: timestamp
type: text
tokenizer: default
record: position
type: i64
fast: true
- name: duration
type: text
tokenizer: default
record: position
type: i64
fast: true
search_settings:
default_search_fields: [url, request, response]

30
index-config-graphql.yaml Normal file
View file

@ -0,0 +1,30 @@
#
# Index config file for gh-archive dataset.
#
version: 0
index_id: graphql
doc_mapping:
mode: strict
field_mappings:
- name: operation_kind
type: text
tokenizer: default
record: position
- name: operation_name
type: text
tokenizer: default
record: position
- name: variables
type: text
tokenizer: default
record: position
- name: response
type: text
tokenizer: default
record: position
search_settings:
default_search_fields: [operation_kind, operation_name, variables]

View file

@ -0,0 +1,68 @@
#
# Index config file for gh-archive dataset.
#
version: 0
index_id: pageevent
doc_mapping:
mode: strict
field_mappings:
- name: message_id
type: i64
fast: true
record: position
- name: timestamp
type: i64
fast: true
- name: url
type: text
tokenizer: default
record: position
- name: referrer
type: text
tokenizer: default
record: position
- name: loaded
type: i64
fast: true
- name: request_start
type: i64
fast: true
- name: response_start
type: i64
fast: true
- name: response_end
type: i64
fast: true
- name: dom_content_loaded_event_start
type: i64
fast: true
- name: dom_content_loaded_event_end
type: i64
fast: true
- name: load_event_start
type: i64
fast: true
- name: load_event_end
type: i64
fast: true
- name: first_paint
type: i64
fast: true
- name: first_contentful_paint
type: i64
fast: true
- name: speed_index
type: i64
fast: true
- name: visually_complete
type: i64
fast: true
- name: time_to_interactive
type: i64
fast: true
search_settings:
default_search_fields: [url, referrer, visually_complete]

View file

@ -5,14 +5,13 @@ from datetime import datetime
from collections import defaultdict
from msgcodec.codec import MessageCodec
from msgcodec.messages import SessionEnd, Fetch, FetchEvent, PageEvent, SetCSSData, SetStyleData
from msgcodec.messages import Fetch, FetchEvent, PageEvent, GraphQL
import json
import getopt, sys
n = 0
fetch_keys = ['method', 'url', 'request', 'response', 'status', 'timestamp', 'duration']
def transform(data):
def transform_fetch(data):
global n
n += 1
return {
@ -20,6 +19,28 @@ def transform(data):
'status': data.status, 'timestamp': data.timestamp, 'duration': data.duration
}
def transform_graphql(data):
global n
n += 1
return {
'operation_kind': data.operation_kind, 'operation_name': data.operation_name,
'variables': data.variables, 'response': data.response
}
def transform_pageevent(data):
global n
n += 1
return {'massage_id': data.message_id, 'timestamp': data.timestamp, 'url': data.timestamp,
'referrer': data.referrer, 'loaded': data.loaded, 'request_start': data.request_start,
'response_start': data.response_start, 'response_end': data.response_end,
'dom_content_loaded_event_start': data.dom_content_loaded_event_start,
'dom_content_loaded_event_end': data.dom_content_loaded_event_end,
'load_event_start': data.load_event_start, 'load_event_end': data.load_event_end,
'first_paint': data.first_paint, 'first_contentful_paint': data.first_contentful_paint,
'speed_index': data.speed_index, 'visually_complete': data.visually_complete,
'time_to_interactive': data.time_to_interactive
}
def create_producer():
producer = KafkaProducer(#security_protocol="SSL",
bootstrap_servers=os.environ['KAFKA_SERVER_2'],
@ -60,8 +81,16 @@ def consumer_producer_end():
for message in messages:
send = False
if isinstance(message, Fetch) or isinstance(message, FetchEvent):
producer.send('quickwit-kafka', value=transform(message))
print(f'added message {n}')
producer.send('quickwit-kafka', value=transform_fetch(message))
print(f'added message {n} type Fetch')
sleep(5)
if isinstance(message, GraphQL):
producer.send('quickwit-kafka', value=transform_graphql(message))
print(f'added message {n} type GraphQL')
sleep(5)
if isinstance(message, PageEvent):
producer.send('quickwit-kafka', value=transform_pageevent(message))
print(f'added message {n} type PageEvent')
sleep(5)

29
sources.yaml Normal file
View file

@ -0,0 +1,29 @@
#
# Source config file.
#
sources:
- fetch: fetch-kafka
source_type: kafka
params:
topic: quickwit-kafka
client_params:
bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092
group.id: fetch-consumer
# security.protocol: SSL
- graphql: graphql-kafka
source_type: kafka
params:
topic: quickwit-kafka
client_params:
bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092
group.id: graphql-consumer
# security.protocol: SSL
- graphql: graphql-pageevent
source_type: kafka
params:
topic: quickwit-kafka
client_params:
bootstrap.servers: kafka-1.kafka-headless.db.svc.cluster.local:9092
group.id: pageevent-consumer
# security.protocol: SSL