diff --git a/api/chalicelib/core/health.py b/api/chalicelib/core/health.py index a9a54977c..139335b1a 100644 --- a/api/chalicelib/core/health.py +++ b/api/chalicelib/core/health.py @@ -46,16 +46,32 @@ else: def __check_database_pg(): + fail_response = { + "health": False, + "details": { + "errors": ["Postgres health-check failed"] + } + } with pg_client.PostgresClient() as cur: - cur.execute("SHOW server_version;") - server_version = cur.fetchone() - cur.execute("SELECT openreplay_version() AS version;") - schema_version = cur.fetchone() + try: + cur.execute("SHOW server_version;") + server_version = cur.fetchone() + except Exception as e: + print("!! health failed: postgres not responding") + print(str(e)) + return fail_response + try: + cur.execute("SELECT openreplay_version() AS version;") + schema_version = cur.fetchone() + except Exception as e: + print("!! health failed: openreplay_version not defined") + print(str(e)) + return fail_response return { "health": True, "details": { - "version": server_version["server_version"], - "schema": schema_version["version"] + # "version": server_version["server_version"], + # "schema": schema_version["version"] } } @@ -71,13 +87,6 @@ def __always_healthy(): } -def __always_healthy_with_version(): - return { - "health": True, - "details": {"version": config("version_number", default="unknown")} - } - - def __check_be_service(service_name): def fn(): fail_response = { @@ -91,21 +100,21 @@ def __check_be_service(service_name): if results.status_code != 200: print(f"!! issue with the storage-health code:{results.status_code}") print(results.text) - fail_response["details"]["errors"].append(results.text) + # fail_response["details"]["errors"].append(results.text) return fail_response except requests.exceptions.Timeout: print(f"!! Timeout getting {service_name}-health") - fail_response["details"]["errors"].append("timeout") + # fail_response["details"]["errors"].append("timeout") return fail_response except Exception as e: print("!! Issue getting storage-health response") print(str(e)) try: print(results.text) - fail_response["details"]["errors"].append(results.text) + # fail_response["details"]["errors"].append(results.text) except: print("couldn't get response") - fail_response["details"]["errors"].append(str(e)) + # fail_response["details"]["errors"].append(str(e)) return fail_response return { "health": True, @@ -121,7 +130,7 @@ def __check_redis(): "details": {"errors": ["server health-check failed"]} } if config("REDIS_STRING", default=None) is None: - fail_response["details"]["errors"].append("REDIS_STRING not defined in env-vars") + # fail_response["details"]["errors"].append("REDIS_STRING not defined in env-vars") return fail_response try: @@ -131,12 +140,14 @@ def __check_redis(): except Exception as e: print("!! Issue getting redis-health response") print(str(e)) - fail_response["details"]["errors"].append(str(e)) + # fail_response["details"]["errors"].append(str(e)) return fail_response return { "health": True, - "details": {"version": r.execute_command('INFO')['redis_version']} + "details": { + # "version": r.execute_command('INFO')['redis_version'] + } } @@ -152,7 +163,7 @@ def get_health(): "alerts": __check_be_service("alerts"), "assets": __check_be_service("assets"), "assist": __check_be_service("assist"), - "chalice": __always_healthy_with_version, + "chalice": __always_healthy, "db": __check_be_service("db"), "ender": __check_be_service("ender"), "frontend": __always_healthy, diff --git a/ee/api/chalicelib/core/health.py b/ee/api/chalicelib/core/health.py index 4b95888d1..a14458193 100644 --- a/ee/api/chalicelib/core/health.py +++ b/ee/api/chalicelib/core/health.py @@ -48,16 +48,32 @@ else: def __check_database_pg(): + fail_response = { + "health": False, + "details": { + "errors": ["Postgres health-check failed"] + } + } with pg_client.PostgresClient() as cur: - cur.execute("SHOW server_version;") - server_version = cur.fetchone() - cur.execute("SELECT openreplay_version() AS version;") - schema_version = cur.fetchone() + try: + cur.execute("SHOW server_version;") + server_version = cur.fetchone() + except Exception as e: + print("!! health failed: postgres not responding") + print(str(e)) + return fail_response + try: + cur.execute("SELECT openreplay_version() AS version;") + schema_version = cur.fetchone() + except Exception as e: + print("!! health failed: openreplay_version not defined") + print(str(e)) + return fail_response return { "health": True, "details": { - "version": server_version["server_version"], - "schema": schema_version["version"] + # "version": server_version["server_version"], + # "schema": schema_version["version"] } } @@ -73,13 +89,6 @@ def __always_healthy(): } -def __always_healthy_with_version(): - return { - "health": True, - "details": {"version": config("version_number", default="unknown")} - } - - def __check_be_service(service_name): def fn(): fail_response = { @@ -93,18 +102,18 @@ def __check_be_service(service_name): if results.status_code != 200: print(f"!! issue with the storage-health code:{results.status_code}") print(results.text) - fail_response["details"]["errors"].append(results.text) + # fail_response["details"]["errors"].append(results.text) return fail_response except requests.exceptions.Timeout: print(f"!! Timeout getting {service_name}-health") - fail_response["details"]["errors"].append("timeout") + # fail_response["details"]["errors"].append("timeout") return fail_response except Exception as e: print("!! Issue getting storage-health response") print(str(e)) try: print(results.text) - fail_response["details"]["errors"].append(results.text) + # fail_response["details"]["errors"].append(results.text) except: print("couldn't get response") fail_response["details"]["errors"].append(str(e)) @@ -123,7 +132,7 @@ def __check_redis(): "details": {"errors": ["server health-check failed"]} } if config("REDIS_STRING", default=None) is None: - fail_response["details"]["errors"].append("REDIS_STRING not defined in env-vars") + # fail_response["details"]["errors"].append("REDIS_STRING not defined in env-vars") return fail_response try: @@ -133,12 +142,14 @@ def __check_redis(): except Exception as e: print("!! Issue getting redis-health response") print(str(e)) - fail_response["details"]["errors"].append(str(e)) + # fail_response["details"]["errors"].append(str(e)) return fail_response return { "health": True, - "details": {"version": r.execute_command('INFO')['redis_version']} + "details": { + # "version": r.execute_command('INFO')['redis_version'] + } } @@ -157,7 +168,7 @@ def get_health(): "alerts": __check_be_service("alerts"), "assets": __check_be_service("assets"), "assist": __check_be_service("assist"), - "chalice": __always_healthy_with_version, + "chalice": __always_healthy, "db": __check_be_service("db"), "ender": __check_be_service("ender"), "frontend": __always_healthy, @@ -179,9 +190,18 @@ def get_health(): def __check_database_ch(): - errors = {} + fail_response = { + "health": False, + "details": {"errors": ["server health-check failed"]} + } with ch_client.ClickHouseClient() as ch: - server_version = ch.execute("SELECT version() AS server_version;") + try: + server_version = ch.execute("SELECT version() AS server_version;") + except Exception as e: + print("!! health failed: clickhouse not responding") + print(str(e)) + return fail_response + schema_version = ch.execute("""SELECT 1 FROM system.functions WHERE name = 'openreplay_version';""") @@ -189,18 +209,19 @@ def __check_database_ch(): schema_version = ch.execute("SELECT openreplay_version()() AS version;") schema_version = schema_version[0]["version"] else: + print("!! health failed: clickhouse schema is outdated") schema_version = "unknown" - errors = {"errors": ["clickhouse schema is outdated"]} + # fail_response["details"]["errors"].append("clickhouse schema is outdated") + return fail_response return { "health": True, "details": { - "version": server_version[0]["server_version"], - "schema": schema_version, - **errors + # "version": server_version[0]["server_version"], + # "schema": schema_version, + # **errors } } - # def __check_kafka(): # fail_response = { # "health": False,