fix(redshift-connector): Redshift connector update (#1538)

* Updated sql events table

* Updated and fixed redshift connector python
This commit is contained in:
MauricioGarciaS 2023-10-23 17:21:05 +02:00 committed by GitHub
parent d89f3efc7a
commit 852ce7b324
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 433 additions and 470 deletions

View file

@ -1,13 +0,0 @@
FROM amancevice/pandas:2.0.2-alpine
WORKDIR app
COPY requirements-fill.txt .
RUN apk add --no-cache --virtual .build-deps gcc g++ musl-dev postgresql-dev && \
pip install -r requirements-fill.txt --no-cache-dir && \
apk --purge del .build-deps
COPY utils utils
COPY fill_from_db.py .
COPY entrypoint-fill.sh .
ENTRYPOINT ./entrypoint-fill.sh

View file

@ -86,6 +86,10 @@ class Event(Base):
consolelog_value = Column(VARCHAR(5000))
customevent_name = Column(VARCHAR(5000))
customevent_payload = Column(VARCHAR(5000))
clickevent_hesitationtime = Column(BigInteger)
clickevent_messageid = Column(BigInteger)
clickevent_label = Column(VARCHAR(5000))
clickevent_selector = Column(VARCHAR(5000))
jsexception_message = Column(VARCHAR(5000))
jsexception_name = Column(VARCHAR(5000))
jsexception_payload = Column(VARCHAR(5000))

View file

@ -5,6 +5,10 @@ dtypes_events = {
'sessionid': "Int64",
'consolelog_level': "string",
'consolelog_value': "string",
'clickevent_hesitationtime': "Int64",
'clickevent_label': "string",
'clickevent_messageid': "Int64",
'clickevent_selector': "string",
'customevent_name': "string",
'customevent_payload': "string",
'jsexception_message': "string",

View file

@ -1,23 +0,0 @@
FROM public.ecr.aws/p1t3u8a3/connectors/redshift:base
ENV CLOUD_SERVICE=redshift \
CONNECTION_STRING=postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{DBNAME} \
# Keep postgres connection
PG_MINCONN=3 \
PG_MAXCONN=10
RUN apk add --no-cache postgresql-libs lz4-libs zstd-libs
COPY deploy/requirements_redshift.txt .
COPY msgcodec msgcodec
COPY build_modules.sh .
RUN apk add --no-cache --virtual .build-deps gcc g++ musl-dev postgresql-dev && \
./build_modules.sh && python3 -m pip install -r requirements_redshift.txt --no-cache-dir && \
apk --purge del .build-deps
COPY utils utils
COPY db db
COPY sql sql
COPY handler.py .
COPY consumer_pool.py .
COPY fill_from_db.py .
COPY entrypoint.sh .
ENV replace_interval=300
ENTRYPOINT ./entrypoint.sh

View file

@ -1,7 +1,7 @@
FROM amancevice/pandas:2.0.2-alpine
FROM amancevice/pandas:alpine-2.1.1
WORKDIR /usr/src/app
ENV LIBRD_VER=2.1.1
ENV LIBRD_VER=2.2.0
WORKDIR /work
RUN apk add --no-cache --virtual .make-deps postgresql-dev gcc python3-dev \
musl-dev linux-headers g++ libc-dev libffi-dev make cmake py-pip build-base \

View file

@ -1,15 +1,15 @@
chardet==5.1.0
chardet==5.2.0
idna==3.4
confluent-kafka==2.1.1
psycopg2-binary==2.9.6
apscheduler==3.10.1
confluent-kafka==2.2.0
psycopg2-binary==2.9.9
apscheduler==3.10.4
python-decouple==3.8
pytz==2022.6
requests==2.28.1
SQLAlchemy==1.4.48
tzlocal==5.0.1
urllib3==1.26.15
pytz==2023.3.post1
requests==2.31.0
SQLAlchemy==1.4.49
tzlocal==5.2
urllib3==2.0.7
sqlalchemy-redshift==0.8.14
redshift-connector==2.0.911
redshift-connector==2.0.915
pandas-redshift==2.0.5
PyYAML==6.0
PyYAML==6.0.1

View file

@ -29,10 +29,10 @@ def handle_normal_message(message: Message) -> Optional[Event]:
return n
if isinstance(message, MouseClick):
n.mouseclick_hesitationtime = message.hesitation_time
n.mouseclick_id = message.id
n.mouseclick_label = message.label
n.mouseclick_selector = message.selector
n.clickevent_hesitationtime = message.hesitation_time
n.clickevent_messageid = message.id
n.clickevent_label = message.label
n.clickevent_selector = message.selector
return n
if isinstance(message, NetworkRequest):

File diff suppressed because it is too large Load diff

View file

@ -1,13 +0,0 @@
chardet==5.1.0
idna==3.4
psycopg2-binary==2.9.6
python-decouple==3.8
pytz==2022.6
requests==2.28.1
SQLAlchemy==1.4.48
tzlocal==5.0.1
urllib3==1.26.15
sqlalchemy-redshift==0.8.14
redshift-connector==2.0.911
pandas-redshift==2.0.5
PyYAML==6.0

View file

@ -3,6 +3,10 @@ CREATE TABLE IF NOT EXISTS connector_events
sessionid BIGINT,
consolelog_level VARCHAR(8000),
consolelog_value VARCHAR(8000),
clickevent_hesitationtime BIGINT,
clickevent_messageid BIGINT,
clickevent_label VARCHAR(8000),
clickevent_selector VARCHAR(8000),
customevent_name VARCHAR(8000),
customevent_payload VARCHAR(8000),
jsexception_message VARCHAR(8000),

View file

@ -40,7 +40,7 @@ if ssl_protocol:
session_messages = [1, 25, 28, 29, 30, 31, 32, 54, 56, 62, 69, 78, 125, 126]
if EVENT_TYPE == 'normal':
events_messages = [21, 22, 25, 27, 64, 78, 125]
events_messages = [21, 22, 25, 27, 64, 69, 78, 125]
elif EVENT_TYPE == 'detailed':
events_messages = [1, 4, 21, 22, 25, 27, 31, 32, 39, 48, 59, 64, 69, 78, 125, 126]
allowed_messages = list(set(session_messages + events_messages))