diff --git a/api/chalicelib/core/jobs.py b/api/chalicelib/core/jobs.py index 2d244e366..bc5ce81ab 100644 --- a/api/chalicelib/core/jobs.py +++ b/api/chalicelib/core/jobs.py @@ -1,6 +1,6 @@ from chalicelib.utils import pg_client, helper from chalicelib.utils.TimeUTC import TimeUTC -from chalicelib.core import sessions, sessions_mobs +from chalicelib.core import sessions, sessions_mobs, sessions_devtool class Actions: @@ -17,11 +17,9 @@ class JobStatus: def get(job_id): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT - * - FROM public.jobs - WHERE job_id = %(job_id)s;""", + """SELECT * + FROM public.jobs + WHERE job_id = %(job_id)s;""", {"job_id": job_id} ) cur.execute(query=query) @@ -37,11 +35,9 @@ def get(job_id): def get_all(project_id): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT - * - FROM public.jobs - WHERE project_id = %(project_id)s;""", + """SELECT * + FROM public.jobs + WHERE project_id = %(project_id)s;""", {"project_id": project_id} ) cur.execute(query=query) @@ -59,15 +55,10 @@ def create(project_id, data): **data } - query = cur.mogrify("""\ - INSERT INTO public.jobs( - project_id, description, status, action, - reference_id, start_at - ) - VALUES ( - %(project_id)s, %(description)s, %(status)s, %(action)s, - %(reference_id)s, %(start_at)s - ) RETURNING *;""", job) + query = cur.mogrify( + """INSERT INTO public.jobs(project_id, description, status, action,reference_id, start_at) + VALUES (%(project_id)s, %(description)s, %(status)s, %(action)s,%(reference_id)s, %(start_at)s) + RETURNING *;""", job) cur.execute(query=query) @@ -90,14 +81,13 @@ def update(job_id, job): **job } - query = cur.mogrify("""\ - UPDATE public.jobs - SET - updated_at = timezone('utc'::text, now()), - status = %(status)s, - errors = %(errors)s - WHERE - job_id = %(job_id)s RETURNING *;""", job_data) + query = cur.mogrify( + """UPDATE public.jobs + SET updated_at = timezone('utc'::text, now()), + status = %(status)s, + errors = %(errors)s + WHERE job_id = %(job_id)s + RETURNING *;""", job_data) cur.execute(query=query) @@ -116,58 +106,37 @@ def format_datetime(r): def get_scheduled_jobs(): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT * FROM public.jobs - WHERE status = %(status)s AND start_at <= (now() at time zone 'utc');""", - {"status": JobStatus.SCHEDULED} - ) + """SELECT * + FROM public.jobs + WHERE status = %(status)s + AND start_at <= (now() at time zone 'utc');""", + {"status": JobStatus.SCHEDULED}) cur.execute(query=query) data = cur.fetchall() - for record in data: - format_datetime(record) - return helper.list_to_camel_case(data) def execute_jobs(): jobs = get_scheduled_jobs() - if len(jobs) == 0: - # No jobs to execute - return - for job in jobs: - print(f"job can be executed {job['id']}") + print(f"Executing jobId:{job['jobId']}") try: if job["action"] == Actions.DELETE_USER_DATA: - session_ids = sessions.get_session_ids_by_user_ids( - project_id=job["projectId"], user_ids=job["referenceId"] - ) - - sessions.delete_sessions_by_session_ids(session_ids) - sessions_mobs.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) + session_ids = sessions.get_session_ids_by_user_ids(project_id=job["projectId"], + user_ids=[job["referenceId"]]) + if len(session_ids) > 0: + print(f"Deleting {len(session_ids)} sessions") + sessions.delete_sessions_by_session_ids(session_ids) + sessions_mobs.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) + sessions_devtool.delete_mobs(session_ids=session_ids, project_id=job["projectId"]) else: - raise Exception(f"The action {job['action']} not supported.") + raise Exception(f"The action '{job['action']}' not supported.") job["status"] = JobStatus.COMPLETED - print(f"job completed {job['id']}") + print(f"Job completed {job['jobId']}") except Exception as e: job["status"] = JobStatus.FAILED - job["error"] = str(e) - print(f"job failed {job['id']}") + job["errors"] = str(e) + print(f"Job failed {job['jobId']}") - update(job["job_id"], job) - - -def group_user_ids_by_project_id(jobs, now): - project_id_user_ids = {} - for job in jobs: - if job["startAt"] > now: - continue - - project_id = job["projectId"] - if project_id not in project_id_user_ids: - project_id_user_ids[project_id] = [] - - project_id_user_ids[project_id].append(job) - - return project_id_user_ids + update(job["jobId"], job) diff --git a/api/chalicelib/core/sessions.py b/api/chalicelib/core/sessions.py index 8f98aac83..7de8253da 100644 --- a/api/chalicelib/core/sessions.py +++ b/api/chalicelib/core/sessions.py @@ -1068,23 +1068,21 @@ def get_session_user(project_id, user_id): def get_session_ids_by_user_ids(project_id, user_ids): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT session_id FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - ids = cur.execute(query=query) - return ids + """SELECT session_id + FROM public.sessions + WHERE project_id = %(project_id)s + AND user_id IN %(userId)s;""", + {"project_id": project_id, "userId": tuple(user_ids)}) + cur.execute(query=query) + ids = cur.fetchall() + return [s["session_id"] for s in ids] def delete_sessions_by_session_ids(session_ids): with pg_client.PostgresClient(unlimited_query=True) as cur: query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - session_id IN %(session_ids)s;""", + """DELETE FROM public.sessions + WHERE session_id IN %(session_ids)s;""", {"session_ids": tuple(session_ids)} ) cur.execute(query=query) @@ -1092,20 +1090,6 @@ def delete_sessions_by_session_ids(session_ids): return True -def delete_sessions_by_user_ids(project_id, user_ids): - with pg_client.PostgresClient(unlimited_query=True) as cur: - query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - cur.execute(query=query) - - return True - - def count_all(): with pg_client.PostgresClient(unlimited_query=True) as cur: cur.execute(query="SELECT COUNT(session_id) AS count FROM public.sessions") diff --git a/api/chalicelib/core/sessions_devtool.py b/api/chalicelib/core/sessions_devtool.py index 6aab5a5e2..50af2bb39 100644 --- a/api/chalicelib/core/sessions_devtool.py +++ b/api/chalicelib/core/sessions_devtool.py @@ -24,3 +24,9 @@ def get_urls(session_id, project_id, check_existence: bool = True): ExpiresIn=config("PRESIGNED_URL_EXPIRATION", cast=int, default=900) )) return results + + +def delete_mobs(project_id, session_ids): + for session_id in session_ids: + for k in __get_devtools_keys(project_id=project_id, session_id=session_id): + s3.schedule_for_deletion(config("sessions_bucket"), k) diff --git a/api/chalicelib/core/sessions_mobs.py b/api/chalicelib/core/sessions_mobs.py index 68c64fd2b..fb9f8fa9e 100644 --- a/api/chalicelib/core/sessions_mobs.py +++ b/api/chalicelib/core/sessions_mobs.py @@ -57,5 +57,6 @@ def get_ios(session_id): def delete_mobs(project_id, session_ids): for session_id in session_ids: - for k in __get_mob_keys(project_id=project_id, session_id=session_id): + for k in __get_mob_keys(project_id=project_id, session_id=session_id) \ + + __get_mob_keys_deprecated(session_id=session_id): s3.schedule_for_deletion(config("sessions_bucket"), k) diff --git a/api/chalicelib/utils/s3.py b/api/chalicelib/utils/s3.py index 655628602..cdd22aa4e 100644 --- a/api/chalicelib/utils/s3.py +++ b/api/chalicelib/utils/s3.py @@ -110,11 +110,14 @@ def rename(source_bucket, source_key, target_bucket, target_key): def schedule_for_deletion(bucket, key): + if not exists(bucket, key): + return False s3 = __get_s3_resource() s3_object = s3.Object(bucket, key) s3_object.copy_from(CopySource={'Bucket': bucket, 'Key': key}, - Expires=datetime.now() + timedelta(days=7), + Expires=datetime.utcnow() + timedelta(days=config("SCH_DELETE_DAYS", cast=int, default=7)), MetadataDirective='REPLACE') + return True def generate_file_key(project_id, key): diff --git a/api/env.default b/api/env.default index 074d9b643..e0560619f 100644 --- a/api/env.default +++ b/api/env.default @@ -53,4 +53,5 @@ PRESIGNED_URL_EXPIRATION=3600 ASSIST_JWT_EXPIRATION=144000 ASSIST_JWT_SECRET= PYTHONUNBUFFERED=1 -REDIS_STRING=redis://redis-master.db.svc.cluster.local:6379 \ No newline at end of file +REDIS_STRING=redis://redis-master.db.svc.cluster.local:6379 +SCH_DELETE_DAYS=7 \ No newline at end of file diff --git a/ee/api/chalicelib/core/sessions_devtool.py b/ee/api/chalicelib/core/sessions_devtool.py index 9961df360..198466f65 100644 --- a/ee/api/chalicelib/core/sessions_devtool.py +++ b/ee/api/chalicelib/core/sessions_devtool.py @@ -31,3 +31,9 @@ def get_urls(session_id, project_id, context: schemas_ee.CurrentContext, check_e ExpiresIn=config("PRESIGNED_URL_EXPIRATION", cast=int, default=900) )) return results + + +def delete_mobs(project_id, session_ids): + for session_id in session_ids: + for k in __get_devtools_keys(project_id=project_id, session_id=session_id): + s3.schedule_for_deletion(config("sessions_bucket"), k) diff --git a/ee/api/chalicelib/core/sessions_exp.py b/ee/api/chalicelib/core/sessions_exp.py index 888800681..1c4d20883 100644 --- a/ee/api/chalicelib/core/sessions_exp.py +++ b/ee/api/chalicelib/core/sessions_exp.py @@ -1399,23 +1399,21 @@ def get_session_user(project_id, user_id): def get_session_ids_by_user_ids(project_id, user_ids): with pg_client.PostgresClient() as cur: query = cur.mogrify( - """\ - SELECT session_id FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - ids = cur.execute(query=query) - return ids + """SELECT session_id + FROM public.sessions + WHERE project_id = %(project_id)s + AND user_id IN %(userId)s;""", + {"project_id": project_id, "userId": tuple(user_ids)}) + cur.execute(query=query) + ids = cur.fetchall() + return [s["session_id"] for s in ids] def delete_sessions_by_session_ids(session_ids): with pg_client.PostgresClient(unlimited_query=True) as cur: query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - session_id IN %(session_ids)s;""", + """DELETE FROM public.sessions + WHERE session_id IN %(session_ids)s;""", {"session_ids": tuple(session_ids)} ) cur.execute(query=query) @@ -1423,20 +1421,6 @@ def delete_sessions_by_session_ids(session_ids): return True -def delete_sessions_by_user_ids(project_id, user_ids): - with pg_client.PostgresClient(unlimited_query=True) as cur: - query = cur.mogrify( - """\ - DELETE FROM public.sessions - WHERE - project_id = %(project_id)s AND user_id IN %(userId)s;""", - {"project_id": project_id, "userId": tuple(user_ids)} - ) - cur.execute(query=query) - - return True - - def count_all(): with ch_client.ClickHouseClient() as cur: row = cur.execute(query=f"SELECT COUNT(session_id) AS count FROM {exp_ch_helper.get_main_sessions_table()}") diff --git a/ee/api/env.default b/ee/api/env.default index 1947e9847..603c291b0 100644 --- a/ee/api/env.default +++ b/ee/api/env.default @@ -73,4 +73,5 @@ PRESIGNED_URL_EXPIRATION=3600 ASSIST_JWT_EXPIRATION=144000 ASSIST_JWT_SECRET= KAFKA_SERVERS=kafka.db.svc.cluster.local:9092 -KAFKA_USE_SSL=false \ No newline at end of file +KAFKA_USE_SSL=false +SCH_DELETE_DAYS=7 \ No newline at end of file