From e85933e96d1dd940310c23f2782921716517b396 Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Fri, 20 Jan 2023 12:25:22 +0100 Subject: [PATCH 1/3] Changed python-kafka to confluent-kafka --- ee/connectors/consumer.py | 24 +++++++++++-------- .../deploy/requirements_bigquery.txt | 2 +- .../deploy/requirements_clickhouse.txt | 2 +- ee/connectors/deploy/requirements_pg.txt | 2 +- .../deploy/requirements_redshift.txt | 2 +- .../deploy/requirements_snowflake.txt | 2 +- 6 files changed, 19 insertions(+), 15 deletions(-) diff --git a/ee/connectors/consumer.py b/ee/connectors/consumer.py index c8d61ff7b..1c3488642 100644 --- a/ee/connectors/consumer.py +++ b/ee/connectors/consumer.py @@ -1,5 +1,5 @@ import os -from kafka import KafkaConsumer +from confluent_kafka import Consumer from datetime import datetime from collections import defaultdict @@ -29,17 +29,21 @@ def main(): sessions_batch = [] codec = MessageCodec() - consumer = KafkaConsumer(security_protocol="SSL", - bootstrap_servers=[os.environ['KAFKA_SERVER_2'], - os.environ['KAFKA_SERVER_1']], - group_id=f"my_test3_connector_{DATABASE}", - auto_offset_reset="earliest", - enable_auto_commit=False - ) + consumer = Consumer({ + "security.protocol": "SSL", + "bootstrap.servers": ",".join([os.environ['KAFKA_SERVER_1'], + os.environ['KAFKA_SERVER_2']]), + "group.id": f"connector_{DATABASE}", + "auto.offset.reset": "earliest", + "enable.auto.commit": False + }) - consumer.subscribe(topics=["raw", "raw_ios"]) + consumer.subscribe(["raw", "raw_ios"]) print("Kafka consumer subscribed") - for msg in consumer: + while True: + msg.consumer.poll(1.0) + if msg is None: + continue messages = codec.decode_detailed(msg.value) session_id = codec.decode_key(msg.key) if messages is None: diff --git a/ee/connectors/deploy/requirements_bigquery.txt b/ee/connectors/deploy/requirements_bigquery.txt index d4d63c953..1474c9ef4 100644 --- a/ee/connectors/deploy/requirements_bigquery.txt +++ b/ee/connectors/deploy/requirements_bigquery.txt @@ -1,4 +1,4 @@ -kafka-python==2.0.2 +confluent-kafka psycopg2-binary==2.9.3 SQLAlchemy==1.4.43 google-cloud-bigquery diff --git a/ee/connectors/deploy/requirements_clickhouse.txt b/ee/connectors/deploy/requirements_clickhouse.txt index b21ea36c5..8853a865f 100644 --- a/ee/connectors/deploy/requirements_clickhouse.txt +++ b/ee/connectors/deploy/requirements_clickhouse.txt @@ -3,7 +3,7 @@ chardet==5.0.0 clickhouse-driver==0.2.4 clickhouse-sqlalchemy==0.2.2 idna==3.4 -kafka-python==2.0.2 +confluent-kafka pandas==1.5.1 pytz==2022.6 requests==2.28.1 diff --git a/ee/connectors/deploy/requirements_pg.txt b/ee/connectors/deploy/requirements_pg.txt index 09bf8b34d..8354e61a9 100644 --- a/ee/connectors/deploy/requirements_pg.txt +++ b/ee/connectors/deploy/requirements_pg.txt @@ -1,7 +1,7 @@ certifi==2022.09.24 chardet==5.0.0 idna==3.4 -kafka-python==2.0.2 +confluent-kafka pandas==1.5.1 psycopg2-binary==2.9.3 pytz==2022.6 diff --git a/ee/connectors/deploy/requirements_redshift.txt b/ee/connectors/deploy/requirements_redshift.txt index e4182cb92..7bd5e3f08 100644 --- a/ee/connectors/deploy/requirements_redshift.txt +++ b/ee/connectors/deploy/requirements_redshift.txt @@ -3,7 +3,7 @@ chardet==5.0.0 clickhouse-driver==0.2.4 clickhouse-sqlalchemy==0.2.2 idna==3.4 -kafka-python==2.0.2 +confluent-kafka psycopg2-binary==2.9.3 pytz==2022.6 requests==2.28.1 diff --git a/ee/connectors/deploy/requirements_snowflake.txt b/ee/connectors/deploy/requirements_snowflake.txt index 895326b32..5a3aaca99 100644 --- a/ee/connectors/deploy/requirements_snowflake.txt +++ b/ee/connectors/deploy/requirements_snowflake.txt @@ -1,5 +1,5 @@ pandas==1.5.1 -kafka-python==2.0.2 +confluent-kafka SQLAlchemy==1.4.43 snowflake-connector-python==2.8.2 snowflake-sqlalchemy==1.4.4 From f4e7321b86e82c5cc906ca3eeb9e14a663516954 Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Fri, 20 Jan 2023 15:19:27 +0100 Subject: [PATCH 2/3] Changed python version to 3.11 and fixed confluent-kafka import error --- ee/connectors/deploy/Dockerfile_bigquery | 7 ++++++- ee/connectors/deploy/Dockerfile_clickhouse | 7 ++++++- ee/connectors/deploy/Dockerfile_pg | 7 ++++++- ee/connectors/deploy/Dockerfile_redshift | 7 ++++++- ee/connectors/deploy/Dockerfile_snowflake | 7 ++++++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/ee/connectors/deploy/Dockerfile_bigquery b/ee/connectors/deploy/Dockerfile_bigquery index 770ccf8fa..515914e15 100644 --- a/ee/connectors/deploy/Dockerfile_bigquery +++ b/ee/connectors/deploy/Dockerfile_bigquery @@ -1,8 +1,13 @@ -FROM python:3.8-slim +FROM python:3.11 WORKDIR /usr/src/app COPY . . +RUN apt update +RUN apt-get install -y libc-dev libffi-dev gcc +RUN apt update && apt -y install software-properties-common gcc +RUN git clone https://github.com/edenhill/librdkafka +RUN cd librdkafka && ./configure && make && make install && ldconfig RUN pip install -r ./deploy/requirements_bigquery.txt diff --git a/ee/connectors/deploy/Dockerfile_clickhouse b/ee/connectors/deploy/Dockerfile_clickhouse index 0b19edeb6..f2dad8f65 100644 --- a/ee/connectors/deploy/Dockerfile_clickhouse +++ b/ee/connectors/deploy/Dockerfile_clickhouse @@ -1,8 +1,13 @@ -FROM python:3.8-slim +FROM python:3.11 WORKDIR /usr/src/app COPY . . +RUN apt update +RUN apt-get install -y libc-dev libffi-dev gcc +RUN apt update && apt -y install software-properties-common gcc +RUN git clone https://github.com/edenhill/librdkafka +RUN cd librdkafka && ./configure && make && make install && ldconfig RUN pip install -r ./deploy/requirements_clickhouse.txt diff --git a/ee/connectors/deploy/Dockerfile_pg b/ee/connectors/deploy/Dockerfile_pg index 0598016cc..a8b1c0f01 100644 --- a/ee/connectors/deploy/Dockerfile_pg +++ b/ee/connectors/deploy/Dockerfile_pg @@ -1,8 +1,13 @@ -FROM python:3.8-slim +FROM python:3.11 WORKDIR /usr/src/app COPY . . +RUN apt update +RUN apt-get install -y libc-dev libffi-dev gcc +RUN apt update && apt -y install software-properties-common gcc +RUN git clone https://github.com/edenhill/librdkafka +RUN cd librdkafka && ./configure && make && make install && ldconfig RUN pip install -r ./deploy/requirements_pg.txt diff --git a/ee/connectors/deploy/Dockerfile_redshift b/ee/connectors/deploy/Dockerfile_redshift index 87e99acc7..c69739623 100644 --- a/ee/connectors/deploy/Dockerfile_redshift +++ b/ee/connectors/deploy/Dockerfile_redshift @@ -1,8 +1,13 @@ -FROM python:3.8-slim +FROM python:3.11 WORKDIR /usr/src/app COPY . . +RUN apt update +RUN apt-get install -y libc-dev libffi-dev gcc +RUN apt update && apt -y install software-properties-common gcc +RUN git clone https://github.com/edenhill/librdkafka +RUN cd librdkafka && ./configure && make && make install && ldconfig RUN pip install -r ./deploy/requirements_redshift.txt diff --git a/ee/connectors/deploy/Dockerfile_snowflake b/ee/connectors/deploy/Dockerfile_snowflake index 4eae51685..1d4b926a8 100644 --- a/ee/connectors/deploy/Dockerfile_snowflake +++ b/ee/connectors/deploy/Dockerfile_snowflake @@ -1,8 +1,13 @@ -FROM python:3.8-slim +FROM python:3.11 WORKDIR /usr/src/app COPY . . +RUN apt update +RUN apt-get install -y libc-dev libffi-dev gcc +RUN apt update && apt -y install software-properties-common gcc +RUN git clone https://github.com/edenhill/librdkafka +RUN cd librdkafka && ./configure && make && make install && ldconfig RUN pip install -r ./deploy/requirements_snowflake.txt From 4d442d045c116bf02e8e06698d61f56687f11a2d Mon Sep 17 00:00:00 2001 From: MauricioGarciaS <47052044+MauricioGarciaS@users.noreply.github.com> Date: Fri, 20 Jan 2023 15:50:37 +0100 Subject: [PATCH 3/3] Changed bigquery module version to latest --- ee/connectors/deploy/requirements_bigquery.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/connectors/deploy/requirements_bigquery.txt b/ee/connectors/deploy/requirements_bigquery.txt index 1474c9ef4..7ce437323 100644 --- a/ee/connectors/deploy/requirements_bigquery.txt +++ b/ee/connectors/deploy/requirements_bigquery.txt @@ -1,7 +1,7 @@ confluent-kafka psycopg2-binary==2.9.3 SQLAlchemy==1.4.43 -google-cloud-bigquery +google-cloud-bigquery==3.4.2 pandas==1.5.1 PyYAML pandas-gbq