fix(quickwit): updated modules and solve s3 connection (#1524)

This commit is contained in:
MauricioGarciaS 2023-10-19 15:16:36 +02:00 committed by GitHub
parent a75dc75aff
commit f083f0cc76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 50 additions and 55 deletions

View file

@ -1,23 +1,25 @@
FROM quickwit/quickwit
FROM quickwit/quickwit:0.6.4
COPY *.yaml /quickwit/
COPY entrypoint.sh /quickwit/
COPY consumer.py /quickwit/
COPY requirements.txt /quickwit/
COPY msgcodec /quickwit/msgcodec
WORKDIR /quickwit
RUN apt-get update
RUN apt-get install python3 python3-pip -y
RUN apt-get clean
COPY requirements.txt /quickwit/
RUN pip install -r requirements.txt
COPY env.default .env
RUN source .env
COPY *.yaml /quickwit/
COPY *.sh /quickwit/
COPY consumer.py /quickwit/
COPY msgcodec /quickwit/msgcodec
ENV filter="true" \
encrypted="false"
encrypted="false" \
fetch_maxsize=800 \
graphql_maxsize=800 \
pageevent_maxsize=800 \
QUICKWIT_PORT=7280
EXPOSE 7280
EXPOSE 7281
ENTRYPOINT ./entrypoint.sh

View file

@ -8,7 +8,7 @@ import json
from time import time, sleep
QUICKWIT_PORT = config('QUICKWIT_PORT', default=7280, cast=int)
#decryption = config('encrypted', cast=bool)
decryption = False
@ -22,12 +22,12 @@ if decryption:
def _quickwit_ingest(index, data_list, retry=0):
try:
res = requests.post(f'http://localhost:7280/api/v1/{index}/ingest', data=__jsonify_data(data_list, index))
res = requests.post(f'http://localhost:{QUICKWIT_PORT}/api/v1/{index}/ingest', data=__jsonify_data(data_list, index))
except requests.exceptions.ConnectionError as e:
retry += 1
assert retry <= max_retry, f'[ENDPOINT CONNECTION FAIL] Failed to connect to endpoint http://localhost:7280/api/v1/{index}/ingest\n{e}\n'
assert retry <= max_retry, f'[ENDPOINT CONNECTION FAIL] Failed to connect to endpoint http://localhost:{QUICKWIT_PORT}/api/v1/{index}/ingest\n{e}\n'
sleep(5*retry)
print(f"[ENDPOINT ERROR] Failed to connect to endpoint http://localhost:7280/api/v1/{index}/ingest, retrying in {5*retry} seconds..\n")
print(f"[ENDPOINT ERROR] Failed to connect to endpoint http://localhost:{QUICKWIT_PORT}/api/v1/{index}/ingest, retrying in {5*retry} seconds..\n")
return _quickwit_ingest(index, data_list, retry=retry)
return res

View file

@ -6,22 +6,7 @@ ls config/
find /quickwit/ -type f -name "*.yaml" -exec sed -i "s#{{KAFKA_SERVER}}#${KAFKA_SERVER}#g" {} \;
find /quickwit/ -type f -name "*.yaml" -exec sed -i "s#{{AWS_BUCKET}}#${AWS_BUCKET}#g" {} \;
find /quickwit/ -type f -name "*.yaml" -exec sed -i "s/{{QUICKWIT_TOPIC}}/${QUICKWIT_TOPIC}/g" {} \;
find /quickwit/ -type f -name "*.yaml" -exec sed -i "s/{{QUICKWIT_PORT}}/${QUICKWIT_PORT}/g" {} \;
find /quickwit/ -type f -name "*.yaml" -exec sed -i "s#{{data_dir_path}}#${data_dir_path}#g" {} \;
quickwit index create --index-config index-config-fetch.yaml --config s3-config.yaml
quickwit index create --index-config index-config-graphql.yaml --config s3-config.yaml
quickwit index create --index-config index-config-pageevent.yaml --config s3-config.yaml
quickwit source delete --index fetchevent --source fetch-kafka --config s3-config.yaml
quickwit source delete --index graphql --source graphql-kafka --config s3-config.yaml
quickwit source delete --index pageevent --source pageevent-kafka --config s3-config.yaml
if [${filter} == "false"]; then
quickwit source create --index fetchevent --source-config source-fetch.yaml --config s3-config.yaml
quickwit source create --index graphql --source-config source-graphql.yaml --config s3-config.yaml
quickwit source create --index pageevent --source-config source-pageevent.yaml --config s3-config.yaml
quickwit run --config s3-config-listen.yaml
else
quickwit run --config s3-config-listen.yaml & python3 consumer.py && fg
fi
./quickwit_start_task.sh & ./setup_indexes_and_worker.sh && fg

View file

@ -1,6 +0,0 @@
KAFKA_SERVER=
QUICKWIT_TOPIC=ee-quickwit
fetch_maxsize=800
graphql_maxsize=800
pageevent_maxsize=800
group_id=ee-quickwit

View file

@ -2,9 +2,10 @@
# Index config file for gh-archive dataset.
#
version: 0.4
version: 0.6
index_id: fetchevent
index_id: "fetchevent"
index_uri: "s3://openreplay-quickwit/quickwit-indexes/fetchevent"
doc_mapping:
mode: dynamic
@ -57,7 +58,7 @@ doc_mapping:
timestamp_field: insertion_timestamp
search_settings:
default_search_fields: [project_id, session_id, url, request]
default_search_fields: [project_id, session_id, url]
retention:
period: 30 days

View file

@ -2,9 +2,10 @@
# Index config file for gh-archive dataset.
#
version: 0.4
version: 0.6
index_id: graphql
index_id: "graphql"
index_uri: "s3://openreplay-quickwit/quickwit-indexes/graphql"
doc_mapping:
mode: dynamic
@ -44,7 +45,7 @@ doc_mapping:
timestamp_field: insertion_timestamp
search_settings:
default_search_fields: [project_id, session_id, operation_kind, operation_name, variables]
default_search_fields: [project_id, session_id, operation_kind, operation_name]
retention:
period: 30 days

View file

@ -2,9 +2,10 @@
# Index config file for gh-archive dataset.
#
version: 0.4
version: 0.6
index_id: pageevent
index_id: "pageevent"
index_uri: "s3://openreplay-quickwit/quickwit-indexes/pageevent"
doc_mapping:
mode: strict

View file

@ -0,0 +1 @@
quickwit run --config=./s3-config-listen.yaml

View file

@ -1,4 +1,4 @@
confluent-kafka
python-decouple
requests
zstd
confluent-kafka==2.2.0
python-decouple==3.8
requests==2.31.0
zstd==1.5.5.1

View file

@ -1,6 +1,7 @@
## In order to save data into S3
# metastore also accepts s3://{bucket/path}#pooling_interval={seconds}s
version: 0
metastore_uri: s3://quickwit/quickwit-indexes
default_index_root_uri: s3://quickwit/quickwit-indexes
version: 0.6
metastore_uri: s3://openreplay-quickwit/quickwit-indexes
default_index_root_uri: s3://openreplay-quickwit/quickwit-indexes
listen_address: 0.0.0.0
rest_listen_port: {{QUICKWIT_PORT}}

View file

@ -1,5 +1,6 @@
## In order to save data into S3
# metastore also accepts s3://{bucket/path}#pooling_interval={seconds}s
version: 0
metastore_uri: s3://quickwit/quickwit-indexes
default_index_root_uri: s3://quickwit/quickwit-indexes
version: 0.6
metastore_uri: s3://openreplay-quickwit/quickwit-indexes
default_index_root_uri: s3://openreplay-quickwit/quickwit-indexes
rest_listen_port: {{QUICKWIT_PORT}}

View file

@ -0,0 +1,8 @@
sleep 120
echo "Creating indexes.."
quickwit index create --index-config index-config-fetch.yaml
quickwit index create --index-config index-config-graphql.yaml
quickwit index create --index-config index-config-pageevent.yaml
echo "Running kafka reader.."
python3 -u consumer.py