From 048a9767ac356efd7cb9ddb0417886443a972ce9 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 13:06:38 +0100 Subject: [PATCH 1/7] feat(chalice): fixed jobs execution --- .github/workflows/api-ee.yaml | 1 + .github/workflows/api.yaml | 1 + api/chalicelib/core/jobs.py | 98 ++++++++++---------------- api/chalicelib/core/sessions.py | 36 +++------- api/chalicelib/core/sessions_mobs.py | 3 +- ee/api/chalicelib/core/sessions_exp.py | 36 +++------- 6 files changed, 61 insertions(+), 114 deletions(-) diff --git a/.github/workflows/api-ee.yaml b/.github/workflows/api-ee.yaml index f9a1730f1..ed81c8972 100644 --- a/.github/workflows/api-ee.yaml +++ b/.github/workflows/api-ee.yaml @@ -10,6 +10,7 @@ on: branches: - dev - api-* + - v1.11.0-patch paths: - "ee/api/**" - "api/**" diff --git a/.github/workflows/api.yaml b/.github/workflows/api.yaml index 8e2f7fa7b..451ae64b5 100644 --- a/.github/workflows/api.yaml +++ b/.github/workflows/api.yaml @@ -10,6 +10,7 @@ on: branches: - dev - api-* + - v1.11.0-patch paths: - "api/**" - "!api/.gitignore" diff --git a/api/chalicelib/core/jobs.py b/api/chalicelib/core/jobs.py index 2d244e366..69a777511 100644 --- a/api/chalicelib/core/jobs.py +++ b/api/chalicelib/core/jobs.py @@ -17,11 +17,9 @@ class JobStatus: def get(job_id): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT - * - FROM public.jobs - WHERE job_id = %(job_id)s;""", + """SELECT * + FROM public.jobs + WHERE job_id = %(job_id)s;""", {"job_id": job_id} ) cur.execute(query=query) @@ -37,11 +35,9 @@ def get(job_id): def get_all(project_id): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT - * - FROM public.jobs - WHERE project_id = %(project_id)s;""", + """SELECT * + FROM public.jobs + WHERE project_id = %(project_id)s;""", {"project_id": project_id} ) cur.execute(query=query) @@ -59,15 +55,10 @@ def create(project_id, data): **data } - query = cur.mogrify("""\ - INSERT INTO public.jobs( - project_id, description, status, action, - reference_id, start_at - ) - VALUES ( - %(project_id)s, %(description)s, %(status)s, %(action)s, - %(reference_id)s, %(start_at)s - ) RETURNING *;""", job) + query = cur.mogrify( + """INSERT INTO public.jobs(project_id, description, status, action,reference_id, start_at) + VALUES (%(project_id)s, %(description)s, %(status)s, %(action)s,%(reference_id)s, %(start_at)s) + RETURNING *;""", job) cur.execute(query=query) @@ -90,14 +81,13 @@ def update(job_id, job): **job } - query = cur.mogrify("""\ - UPDATE public.jobs - SET - updated_at = timezone('utc'::text, now()), - status = %(status)s, - errors = %(errors)s - WHERE - job_id = %(job_id)s RETURNING *;""", job_data) + query = cur.mogrify( + """UPDATE public.jobs + SET updated_at = timezone('utc'::text, now()), + status = %(status)s, + errors = %(errors)s + WHERE job_id = %(job_id)s + RETURNING *;""", job_data) cur.execute(query=query) @@ -116,11 +106,12 @@ def format_datetime(r): def get_scheduled_jobs(): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT * FROM public.jobs - WHERE status = %(status)s AND start_at <= (now() at time zone 'utc');""", - {"status": JobStatus.SCHEDULED} - ) + """SELECT * FROM public.jobs + WHERE status = %(status)s AND start_at <= (now() at time zone 'utc');""", + {"status": JobStatus.SCHEDULED}) + print("------------------") + print(query) + print("------------------") cur.execute(query=query) data = cur.fetchall() for record in data: @@ -131,43 +122,28 @@ def get_scheduled_jobs(): def execute_jobs(): jobs = get_scheduled_jobs() - if len(jobs) == 0: - # No jobs to execute - return for job in jobs: - print(f"job can be executed {job['id']}") + print(f"Executing jobId:{job['jobId']}") try: if job["action"] == Actions.DELETE_USER_DATA: - session_ids = sessions.get_session_ids_by_user_ids( - project_id=job["projectId"], user_ids=job["referenceId"] - ) - - sessions.delete_sessions_by_session_ids(session_ids) - sessions_mobs.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) + session_ids = sessions.get_session_ids_by_user_ids(project_id=job["projectId"], + user_ids=[job["referenceId"]]) + if len(session_ids) > 0: + print(f"Deleting {len(session_ids)} sessions") + sessions.delete_sessions_by_session_ids(session_ids) + sessions_mobs.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) else: - raise Exception(f"The action {job['action']} not supported.") + raise Exception(f"The action '{job['action']}' not supported.") job["status"] = JobStatus.COMPLETED - print(f"job completed {job['id']}") + print(f"Job completed {job['jobId']}") except Exception as e: + print("-----") + print(e) + print("-----") job["status"] = JobStatus.FAILED job["error"] = str(e) - print(f"job failed {job['id']}") + print(f"Job failed {job['jobId']}") - update(job["job_id"], job) - - -def group_user_ids_by_project_id(jobs, now): - project_id_user_ids = {} - for job in jobs: - if job["startAt"] > now: - continue - - project_id = job["projectId"] - if project_id not in project_id_user_ids: - project_id_user_ids[project_id] = [] - - project_id_user_ids[project_id].append(job) - - return project_id_user_ids + update(job["jobId"], job) diff --git a/api/chalicelib/core/sessions.py b/api/chalicelib/core/sessions.py index 8f98aac83..7de8253da 100644 --- a/api/chalicelib/core/sessions.py +++ b/api/chalicelib/core/sessions.py @@ -1068,23 +1068,21 @@ def get_session_user(project_id, user_id): def get_session_ids_by_user_ids(project_id, user_ids): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT session_id FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - ids = cur.execute(query=query) - return ids + """SELECT session_id + FROM public.sessions + WHERE project_id = %(project_id)s + AND user_id IN %(userId)s;""", + {"project_id": project_id, "userId": tuple(user_ids)}) + cur.execute(query=query) + ids = cur.fetchall() + return [s["session_id"] for s in ids] def delete_sessions_by_session_ids(session_ids): with pg_client.PostgresClient(unlimited_query=True) as cur: query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - session_id IN %(session_ids)s;""", + """DELETE FROM public.sessions + WHERE session_id IN %(session_ids)s;""", {"session_ids": tuple(session_ids)} ) cur.execute(query=query) @@ -1092,20 +1090,6 @@ def delete_sessions_by_session_ids(session_ids): return True -def delete_sessions_by_user_ids(project_id, user_ids): - with pg_client.PostgresClient(unlimited_query=True) as cur: - query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - cur.execute(query=query) - - return True - - def count_all(): with pg_client.PostgresClient(unlimited_query=True) as cur: cur.execute(query="SELECT COUNT(session_id) AS count FROM public.sessions") diff --git a/api/chalicelib/core/sessions_mobs.py b/api/chalicelib/core/sessions_mobs.py index 68c64fd2b..fb9f8fa9e 100644 --- a/api/chalicelib/core/sessions_mobs.py +++ b/api/chalicelib/core/sessions_mobs.py @@ -57,5 +57,6 @@ def get_ios(session_id): def delete_mobs(project_id, session_ids): for session_id in session_ids: - for k in __get_mob_keys(project_id=project_id, session_id=session_id): + for k in __get_mob_keys(project_id=project_id, session_id=session_id) \ + + __get_mob_keys_deprecated(session_id=session_id): s3.schedule_for_deletion(config("sessions_bucket"), k) diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 888800681..1c4d20883 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -1399,23 +1399,21 @@ def get_session_user(project_id, user_id): def get_session_ids_by_user_ids(project_id, user_ids): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT session_id FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - ids = cur.execute(query=query) - return ids + """SELECT session_id + FROM public.sessions + WHERE project_id = %(project_id)s + AND user_id IN %(userId)s;""", + {"project_id": project_id, "userId": tuple(user_ids)}) + cur.execute(query=query) + ids = cur.fetchall() + return [s["session_id"] for s in ids] def delete_sessions_by_session_ids(session_ids): with pg_client.PostgresClient(unlimited_query=True) as cur: query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - session_id IN %(session_ids)s;""", + """DELETE FROM public.sessions + WHERE session_id IN %(session_ids)s;""", {"session_ids": tuple(session_ids)} ) cur.execute(query=query) @@ -1423,20 +1421,6 @@ def delete_sessions_by_session_ids(session_ids): return True -def delete_sessions_by_user_ids(project_id, user_ids): - with pg_client.PostgresClient(unlimited_query=True) as cur: - query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - cur.execute(query=query) - - return True - - def count_all(): with ch_client.ClickHouseClient() as cur: row = cur.execute(query=f"SELECT COUNT(session_id) AS count FROM {exp_ch_helper.get_main_sessions_table()}") From b82be4c540e56eea2326f11defee507fa7d4cb94 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 13:37:16 +0100 Subject: [PATCH 2/7] feat(chalice): debugging jobs execution --- api/chalicelib/core/jobs.py | 12 ++++-------- api/chalicelib/core/sessions.py | 3 +++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/api/chalicelib/core/jobs.py b/api/chalicelib/core/jobs.py index 69a777511..783cbad2c 100644 --- a/api/chalicelib/core/jobs.py +++ b/api/chalicelib/core/jobs.py @@ -106,17 +106,13 @@ def format_datetime(r): def get_scheduled_jobs(): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """SELECT * FROM public.jobs - WHERE status = %(status)s AND start_at <= (now() at time zone 'utc');""", + """SELECT * + FROM public.jobs + WHERE status = %(status)s + AND start_at <= (now() at time zone 'utc');""", {"status": JobStatus.SCHEDULED}) - print("------------------") - print(query) - print("------------------") cur.execute(query=query) data = cur.fetchall() - for record in data: - format_datetime(record) - return helper.list_to_camel_case(data) diff --git a/api/chalicelib/core/sessions.py b/api/chalicelib/core/sessions.py index 7de8253da..9edaa280c 100644 --- a/api/chalicelib/core/sessions.py +++ b/api/chalicelib/core/sessions.py @@ -1073,6 +1073,9 @@ def get_session_ids_by_user_ids(project_id, user_ids): WHERE project_id = %(project_id)s AND user_id IN %(userId)s;""", {"project_id": project_id, "userId": tuple(user_ids)}) + print("----------") + print(query) + print("----------") cur.execute(query=query) ids = cur.fetchall() return [s["session_id"] for s in ids] From 82e2856d9915c481d89ec72ab8ddeacb96069c9b Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 13:48:52 +0100 Subject: [PATCH 3/7] feat(chalice): debugging jobs execution --- api/chalicelib/core/jobs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/chalicelib/core/jobs.py b/api/chalicelib/core/jobs.py index 783cbad2c..88554e314 100644 --- a/api/chalicelib/core/jobs.py +++ b/api/chalicelib/core/jobs.py @@ -111,13 +111,18 @@ def get_scheduled_jobs(): WHERE status = %(status)s AND start_at <= (now() at time zone 'utc');""", {"status": JobStatus.SCHEDULED}) + print(query) cur.execute(query=query) data = cur.fetchall() + print(">>>") + print(data) return helper.list_to_camel_case(data) def execute_jobs(): + print(">>> looking for jobs to execute") jobs = get_scheduled_jobs() + print(jobs) for job in jobs: print(f"Executing jobId:{job['jobId']}") From 4113ffaa3b52d8d9c07fb547dff8ccbb42360eac Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 14:07:47 +0100 Subject: [PATCH 4/7] feat(chalice): debugging jobs execution --- api/chalicelib/core/jobs.py | 6 ------ api/chalicelib/utils/s3.py | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/api/chalicelib/core/jobs.py b/api/chalicelib/core/jobs.py index 88554e314..0c78859be 100644 --- a/api/chalicelib/core/jobs.py +++ b/api/chalicelib/core/jobs.py @@ -111,19 +111,13 @@ def get_scheduled_jobs(): WHERE status = %(status)s AND start_at <= (now() at time zone 'utc');""", {"status": JobStatus.SCHEDULED}) - print(query) cur.execute(query=query) data = cur.fetchall() - print(">>>") - print(data) return helper.list_to_camel_case(data) def execute_jobs(): - print(">>> looking for jobs to execute") jobs = get_scheduled_jobs() - print(jobs) - for job in jobs: print(f"Executing jobId:{job['jobId']}") try: diff --git a/api/chalicelib/utils/s3.py b/api/chalicelib/utils/s3.py index 655628602..d1acff558 100644 --- a/api/chalicelib/utils/s3.py +++ b/api/chalicelib/utils/s3.py @@ -110,11 +110,14 @@ def rename(source_bucket, source_key, target_bucket, target_key): def schedule_for_deletion(bucket, key): + if not exists(bucket, key): + return False s3 = __get_s3_resource() s3_object = s3.Object(bucket, key) s3_object.copy_from(CopySource={'Bucket': bucket, 'Key': key}, Expires=datetime.now() + timedelta(days=7), MetadataDirective='REPLACE') + return True def generate_file_key(project_id, key): From e25bfabba0b4a6f56e27956581075efcd5d0d73e Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 14:36:51 +0100 Subject: [PATCH 5/7] feat(chalice): fixing jobs execution --- api/chalicelib/core/jobs.py | 8 +++----- api/chalicelib/core/sessions.py | 3 --- api/chalicelib/core/sessions_devtool.py | 6 ++++++ ee/api/chalicelib/core/sessions_devtool.py | 6 ++++++ 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/api/chalicelib/core/jobs.py b/api/chalicelib/core/jobs.py index 0c78859be..bc5ce81ab 100644 --- a/api/chalicelib/core/jobs.py +++ b/api/chalicelib/core/jobs.py @@ -1,6 +1,6 @@ from chalicelib.utils import pg_client, helper from chalicelib.utils.TimeUTC import TimeUTC -from chalicelib.core import sessions, sessions_mobs +from chalicelib.core import sessions, sessions_mobs, sessions_devtool class Actions: @@ -128,17 +128,15 @@ def execute_jobs(): print(f"Deleting {len(session_ids)} sessions") sessions.delete_sessions_by_session_ids(session_ids) sessions_mobs.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) + sessions_devtool.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) else: raise Exception(f"The action '{job['action']}' not supported.") job["status"] = JobStatus.COMPLETED print(f"Job completed {job['jobId']}") except Exception as e: - print("-----") - print(e) - print("-----") job["status"] = JobStatus.FAILED - job["error"] = str(e) + job["errors"] = str(e) print(f"Job failed {job['jobId']}") update(job["jobId"], job) diff --git a/api/chalicelib/core/sessions.py b/api/chalicelib/core/sessions.py index 9edaa280c..7de8253da 100644 --- a/api/chalicelib/core/sessions.py +++ b/api/chalicelib/core/sessions.py @@ -1073,9 +1073,6 @@ def get_session_ids_by_user_ids(project_id, user_ids): WHERE project_id = %(project_id)s AND user_id IN %(userId)s;""", {"project_id": project_id, "userId": tuple(user_ids)}) - print("----------") - print(query) - print("----------") cur.execute(query=query) ids = cur.fetchall() return [s["session_id"] for s in ids] diff --git a/api/chalicelib/core/sessions_devtool.py b/api/chalicelib/core/sessions_devtool.py index 6aab5a5e2..50af2bb39 100644 --- a/api/chalicelib/core/sessions_devtool.py +++ b/api/chalicelib/core/sessions_devtool.py @@ -24,3 +24,9 @@ def get_urls(session_id, project_id, check_existence: bool = True): ExpiresIn=config("PRESIGNED_URL_EXPIRATION", cast=int, default=900) )) return results + + +def delete_mobs(project_id, session_ids): + for session_id in session_ids: + for k in __get_devtools_keys(project_id=project_id, session_id=session_id): + s3.schedule_for_deletion(config("sessions_bucket"), k) diff --git a/ee/api/chalicelib/core/sessions_devtool.py b/ee/api/chalicelib/core/sessions_devtool.py index 9961df360..198466f65 100644 --- a/ee/api/chalicelib/core/sessions_devtool.py +++ b/ee/api/chalicelib/core/sessions_devtool.py @@ -31,3 +31,9 @@ def get_urls(session_id, project_id, context: schemas_ee.CurrentContext, check_e ExpiresIn=config("PRESIGNED_URL_EXPIRATION", cast=int, default=900) )) return results + + +def delete_mobs(project_id, session_ids): + for session_id in session_ids: + for k in __get_devtools_keys(project_id=project_id, session_id=session_id): + s3.schedule_for_deletion(config("sessions_bucket"), k) From 5c0faea838077e86c5bd1c3f773e7414f2fd9076 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 14:51:15 +0100 Subject: [PATCH 6/7] feat(chalice): configurable mobs expiration --- api/chalicelib/utils/s3.py | 2 +- api/env.default | 3 ++- ee/api/env.default | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/chalicelib/utils/s3.py b/api/chalicelib/utils/s3.py index d1acff558..cdd22aa4e 100644 --- a/api/chalicelib/utils/s3.py +++ b/api/chalicelib/utils/s3.py @@ -115,7 +115,7 @@ def schedule_for_deletion(bucket, key): s3 = __get_s3_resource() s3_object = s3.Object(bucket, key) s3_object.copy_from(CopySource={'Bucket': bucket, 'Key': key}, - Expires=datetime.now() + timedelta(days=7), + Expires=datetime.utcnow() + timedelta(days=config("SCH_DELETE_DAYS", cast=int, default=7)), MetadataDirective='REPLACE') return True diff --git a/api/env.default b/api/env.default index 074d9b643..e0560619f 100644 --- a/api/env.default +++ b/api/env.default @@ -53,4 +53,5 @@ PRESIGNED_URL_EXPIRATION=3600 ASSIST_JWT_EXPIRATION=144000 ASSIST_JWT_SECRET= PYTHONUNBUFFERED=1 -REDIS_STRING=redis://redis-master.db.svc.cluster.local:6379 \ No newline at end of file +REDIS_STRING=redis://redis-master.db.svc.cluster.local:6379 +SCH_DELETE_DAYS=7 \ No newline at end of file diff --git a/ee/api/env.default b/ee/api/env.default index 1947e9847..603c291b0 100644 --- a/ee/api/env.default +++ b/ee/api/env.default @@ -73,4 +73,5 @@ PRESIGNED_URL_EXPIRATION=3600 ASSIST_JWT_EXPIRATION=144000 ASSIST_JWT_SECRET= KAFKA_SERVERS=kafka.db.svc.cluster.local:9092 -KAFKA_USE_SSL=false \ No newline at end of file +KAFKA_USE_SSL=false +SCH_DELETE_DAYS=7 \ No newline at end of file From 0d01afbcb54f5c10b547a3b9d3ea0daa796861e4 Mon Sep 17 00:00:00 2001 From: Taha Yassine Kraiem Date: Wed, 12 Apr 2023 15:07:23 +0100 Subject: [PATCH 7/7] feat(chalice): changes --- .github/workflows/api-ee.yaml | 1 - .github/workflows/api.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/api-ee.yaml b/.github/workflows/api-ee.yaml index ed81c8972..f9a1730f1 100644 --- a/.github/workflows/api-ee.yaml +++ b/.github/workflows/api-ee.yaml @@ -10,7 +10,6 @@ on: branches: - dev - api-* - - v1.11.0-patch paths: - "ee/api/**" - "api/**" diff --git a/.github/workflows/api.yaml b/.github/workflows/api.yaml index 451ae64b5..8e2f7fa7b 100644 --- a/.github/workflows/api.yaml +++ b/.github/workflows/api.yaml @@ -10,7 +10,6 @@ on: branches: - dev - api-* - - v1.11.0-patch paths: - "api/**" - "!api/.gitignore"