feat(chalice): health-check test

This commit is contained in:
Taha Yassine Kraiem 2023-03-07 16:10:08 +01:00
parent 071b2e77f4
commit 1d06e651ea
9 changed files with 348 additions and 2 deletions

View file

@ -12,7 +12,7 @@ from chalicelib.utils import pg_client
from routers import core, core_dynamic
from routers.crons import core_crons
from routers.crons import core_dynamic_crons
from routers.subs import insights, metrics, v1_api
from routers.subs import insights, metrics, v1_api, health
app = FastAPI(root_path="/api", docs_url=config("docs_url", default=""), redoc_url=config("redoc_url", default=""))
app.add_middleware(GZipMiddleware, minimum_size=1000)
@ -51,6 +51,9 @@ app.include_router(core_dynamic.app_apikey)
app.include_router(metrics.app)
app.include_router(insights.app)
app.include_router(v1_api.app_apikey)
app.include_router(health.public_app)
app.include_router(health.app)
app.include_router(health.app_apikey)
loglevel = config("LOGLEVEL", default=logging.INFO)
print(f">Loglevel set to: {loglevel}")

View file

@ -0,0 +1,148 @@
import requests
from decouple import config
from chalicelib.utils import pg_client
if config("LOCAL_DEV", cast=bool, default=False):
HEALTH_ENDPOINTS = {
"alerts": "http://127.0.0.1:8888/metrics",
"assets": "http://127.0.0.1:8888/metrics",
"assist": "http://127.0.0.1:8888/metrics",
"chalice": "http://127.0.0.1:8888/metrics",
"db": "http://127.0.0.1:8888/metrics",
"ender": "http://127.0.0.1:8888/metrics",
"frontend": "http://127.0.0.1:8888/metrics",
"heuristics": "http://127.0.0.1:8888/metrics",
"http": "http://127.0.0.1:8888/metrics",
"ingress-nginx": "http://127.0.0.1:8888/metrics",
"integrations": "http://127.0.0.1:8888/metrics",
"peers": "http://127.0.0.1:8888/metrics",
"quickwit": "http://127.0.0.1:8888/metrics",
"sink": "http://127.0.0.1:8888/metrics",
"sourcemapreader": "http://127.0.0.1:8888/metrics",
"storage": "http://127.0.0.1:8888/metrics",
"utilities": "http://127.0.0.1:8888/metrics"
}
else:
HEALTH_ENDPOINTS = {
"alerts": "http://alerts-openreplay.app.svc.cluster.local:8888/metrics",
"assets": "http://assets-openreplay.app.svc.cluster.local:8888/metrics",
"assist": "http://assist-openreplay.app.svc.cluster.local:8888/metrics",
"chalice": "http://chalice-openreplay.app.svc.cluster.local:8888/metrics",
"db": "http://db-openreplay.app.svc.cluster.local:8888/metrics",
"ender": "http://ender-openreplay.app.svc.cluster.local:8888/metrics",
"frontend": "http://frontend-openreplay.app.svc.cluster.local:8888/metrics",
"heuristics": "http://heuristics-openreplay.app.svc.cluster.local:8888/metrics",
"http": "http://http-openreplay.app.svc.cluster.local:8888/metrics",
"ingress-nginx": "http://ingress-nginx-openreplay.app.svc.cluster.local:8888/metrics",
"integrations": "http://integrations-openreplay.app.svc.cluster.local:8888/metrics",
"peers": "http://peers-openreplay.app.svc.cluster.local:8888/metrics",
"quickwit": "http://quickwit-openreplay.app.svc.cluster.local:8888/metrics",
"sink": "http://sink-openreplay.app.svc.cluster.local:8888/metrics",
"sourcemapreader": "http://sourcemapreader-openreplay.app.svc.cluster.local:8888/metrics",
"storage": "http://storage-openreplay.app.svc.cluster.local:8888/metrics",
"utilities": "http://utilities-openreplay.app.svc.cluster.local:8888/metrics",
}
def __check_database_pg():
with pg_client.PostgresClient() as cur:
cur.execute("SHOW server_version;")
server_version = cur.fetchone()
cur.execute("SELECT openreplay_version() AS version;")
schema_version = cur.fetchone()
return {
"health": True,
"details": {
"version": server_version["server_version"],
"schema": schema_version["version"]
}
}
def __not_supported():
return {"errors": ["not supported"]}
def check_be_service(service_name):
def fn():
fail_response = {
"health": False,
"details": {
"errors": ["server health-check failed"]
}
}
try:
results = requests.get(HEALTH_ENDPOINTS.get(service_name), timeout=2)
if results.status_code != 200:
print(f"!! issue with the storage-health code:{results.status_code}")
print(results.text)
fail_response["details"]["errors"].append(results.text)
return fail_response
except requests.exceptions.Timeout:
print(f"!! Timeout getting {service_name}-health")
fail_response["details"]["errors"].append("timeout")
return fail_response
except Exception as e:
print("!! Issue getting storage-health response")
print(str(e))
print("expected JSON, received:")
try:
print(results.text)
fail_response["details"]["errors"].append(results.text)
except:
print("couldn't get response")
fail_response["details"]["errors"].append(str(e))
return fail_response
return {
"health": True,
"details": {}
}
return fn
def get_health():
health_map = {
"databases": {
"postgres": __check_database_pg
},
"ingestionPipeline": {
"redis": __not_supported
},
"backendServices": {
"alerts": check_be_service("alerts"),
"assets": check_be_service("assets"),
"assist": check_be_service("assist"),
"chalice": check_be_service("chalice"),
"db": check_be_service("db"),
"ender": check_be_service("ender"),
"frontend": check_be_service("frontend"),
"heuristics": check_be_service("heuristics"),
"http": check_be_service("http"),
"ingress-nginx": check_be_service("ingress-nginx"),
"integrations": check_be_service("integrations"),
"peers": check_be_service("peers"),
"quickwit": check_be_service("quickwit"),
"sink": check_be_service("sink"),
"sourcemapreader": check_be_service("sourcemapreader"),
"storage": check_be_service("storage"),
"utilities": check_be_service("utilities")
},
# "overall": {
# "health": "na",
# "details": {
# "numberOfEventCaptured": "int",
# "numberOfSessionsCaptured": "int"
# },
# "labels": {
# "parent": "information"
# }
# },
# "ssl": True
}
for parent_key in health_map.keys():
for element_key in health_map[parent_key]:
health_map[parent_key][element_key] = health_map[parent_key][element_key]()
return health_map

View file

@ -0,0 +1,15 @@
from typing import Union
from fastapi import Body, Depends, Request
import schemas
from chalicelib.core import health
from or_dependencies import OR_context
from routers.base import get_routers
public_app, app, app_apikey = get_routers()
@public_app.get('/health', tags=["dashboard"])
def get_global_health():
return {"data": health.get_health()}

1
ee/api/.gitignore vendored
View file

@ -264,5 +264,6 @@ Pipfile.lock
/app_alerts.py
/build_alerts.sh
/build_crons.sh
/routers/subs/health.py
/routers/subs/v1_api.py
#exp /chalicelib/core/dashboards.py

View file

@ -18,7 +18,7 @@ from routers.crons import core_crons
from routers.crons import core_dynamic_crons
from routers.crons import ee_crons
from routers.subs import insights, metrics, v1_api_ee
from routers.subs import v1_api
from routers.subs import v1_api, health
app = FastAPI(root_path="/api", docs_url=config("docs_url", default=""), redoc_url=config("redoc_url", default=""))
app.add_middleware(GZipMiddleware, minimum_size=1000)
@ -68,6 +68,9 @@ app.include_router(metrics.app)
app.include_router(insights.app)
app.include_router(v1_api.app_apikey)
app.include_router(v1_api_ee.app_apikey)
app.include_router(health.public_app)
app.include_router(health.app)
app.include_router(health.app_apikey)
loglevel = config("LOGLEVEL", default=logging.INFO)
print(f">Loglevel set to: {loglevel}")

View file

@ -0,0 +1,173 @@
import requests
from decouple import config
from chalicelib.utils import pg_client, ch_client
if config("LOCAL_DEV", cast=bool, default=False):
HEALTH_ENDPOINTS = {
"alerts": "http://127.0.0.1:8888/metrics",
"assets": "http://127.0.0.1:8888/metrics",
"assist": "http://127.0.0.1:8888/metrics",
"chalice": "http://127.0.0.1:8888/metrics",
"db": "http://127.0.0.1:8888/metrics",
"ender": "http://127.0.0.1:8888/metrics",
"frontend": "http://127.0.0.1:8888/metrics",
"heuristics": "http://127.0.0.1:8888/metrics",
"http": "http://127.0.0.1:8888/metrics",
"ingress-nginx": "http://127.0.0.1:8888/metrics",
"integrations": "http://127.0.0.1:8888/metrics",
"peers": "http://127.0.0.1:8888/metrics",
"quickwit": "http://127.0.0.1:8888/metrics",
"sink": "http://127.0.0.1:8888/metrics",
"sourcemapreader": "http://127.0.0.1:8888/metrics",
"storage": "http://127.0.0.1:8888/metrics",
"utilities": "http://127.0.0.1:8888/metrics"
}
else:
HEALTH_ENDPOINTS = {
"alerts": "http://alerts-openreplay.app.svc.cluster.local:8888/metrics",
"assets": "http://assets-openreplay.app.svc.cluster.local:8888/metrics",
"assist": "http://assist-openreplay.app.svc.cluster.local:8888/metrics",
"chalice": "http://chalice-openreplay.app.svc.cluster.local:8888/metrics",
"db": "http://db-openreplay.app.svc.cluster.local:8888/metrics",
"ender": "http://ender-openreplay.app.svc.cluster.local:8888/metrics",
"frontend": "http://frontend-openreplay.app.svc.cluster.local:8888/metrics",
"heuristics": "http://heuristics-openreplay.app.svc.cluster.local:8888/metrics",
"http": "http://http-openreplay.app.svc.cluster.local:8888/metrics",
"ingress-nginx": "http://ingress-nginx-openreplay.app.svc.cluster.local:8888/metrics",
"integrations": "http://integrations-openreplay.app.svc.cluster.local:8888/metrics",
"peers": "http://peers-openreplay.app.svc.cluster.local:8888/metrics",
"quickwit": "http://quickwit-openreplay.app.svc.cluster.local:8888/metrics",
"sink": "http://sink-openreplay.app.svc.cluster.local:8888/metrics",
"sourcemapreader": "http://sourcemapreader-openreplay.app.svc.cluster.local:8888/metrics",
"storage": "http://storage-openreplay.app.svc.cluster.local:8888/metrics",
"utilities": "http://utilities-openreplay.app.svc.cluster.local:8888/metrics",
}
def __check_database_pg():
with pg_client.PostgresClient() as cur:
cur.execute("SHOW server_version;")
server_version = cur.fetchone()
cur.execute("SELECT openreplay_version() AS version;")
schema_version = cur.fetchone()
return {
"health": True,
"details": {
"version": server_version["server_version"],
"schema": schema_version["version"]
}
}
def __check_database_ch():
errors = {}
with ch_client.ClickHouseClient() as ch:
server_version = ch.execute("SELECT version() AS server_version;")
schema_version = ch.execute("""SELECT 1
FROM system.functions
WHERE name = 'openreplay_version';""")
if len(schema_version) > 0:
schema_version = ch.execute("SELECT openreplay_version()() AS version;")
schema_version = schema_version[0]["version"]
else:
schema_version = "unknown"
errors = {"errors": ["clickhouse schema is outdated"]}
return {
"health": True,
"details": {
"version": server_version[0]["server_version"],
"schema": schema_version,
**errors
}
}
def __not_supported():
return {"errors": ["not supported"]}
def check_be_service(service_name):
def fn():
fail_response = {
"health": False,
"details": {
"errors": ["server health-check failed"]
}
}
try:
results = requests.get(HEALTH_ENDPOINTS.get(service_name), timeout=2)
if results.status_code != 200:
print(f"!! issue with the storage-health code:{results.status_code}")
print(results.text)
fail_response["details"]["errors"].append(results.text)
return fail_response
except requests.exceptions.Timeout:
print(f"!! Timeout getting {service_name}-health")
fail_response["details"]["errors"].append("timeout")
return fail_response
except Exception as e:
print("!! Issue getting storage-health response")
print(str(e))
print("expected JSON, received:")
try:
print(results.text)
fail_response["details"]["errors"].append(results.text)
except:
print("couldn't get response")
fail_response["details"]["errors"].append(str(e))
return fail_response
return {
"health": True,
"details": {}
}
return fn
def get_health():
health_map = {
"databases": {
"postgres": __check_database_pg,
"clickhouse": __check_database_ch
},
"ingestionPipeline": {
"redis": __not_supported,
"kafka": __not_supported
},
"backendServices": {
"alerts": check_be_service("alerts"),
"assets": check_be_service("assets"),
"assist": check_be_service("assist"),
"chalice": check_be_service("chalice"),
"db": check_be_service("db"),
"ender": check_be_service("ender"),
"frontend": check_be_service("frontend"),
"heuristics": check_be_service("heuristics"),
"http": check_be_service("http"),
"ingress-nginx": check_be_service("ingress-nginx"),
"integrations": check_be_service("integrations"),
"peers": check_be_service("peers"),
"quickwit": check_be_service("quickwit"),
"sink": check_be_service("sink"),
"sourcemapreader": check_be_service("sourcemapreader"),
"storage": check_be_service("storage"),
"utilities": check_be_service("utilities")
},
# "overall": {
# "health": "na",
# "details": {
# "numberOfEventCaptured": "int",
# "numberOfSessionsCaptured": "int"
# },
# "labels": {
# "parent": "information"
# }
# },
# "ssl": True
}
for parent_key in health_map.keys():
for element_key in health_map[parent_key]:
health_map[parent_key][element_key] = health_map[parent_key][element_key]()
return health_map

View file

@ -78,6 +78,7 @@ rm -rf ./Dockerfile_bundle
rm -rf ./entrypoint.bundle.sh
rm -rf ./chalicelib/core/heatmaps.py
rm -rf ./schemas.py
rm -rf ./routers/subs/health.py
rm -rf ./routers/subs/v1_api.py
#exp rm -rf ./chalicelib/core/custom_metrics.py
rm -rf ./chalicelib/core/performance_event.py

View file

@ -0,0 +1 @@
CREATE OR REPLACE FUNCTION openreplay_version AS() -> 'v1.11.0-ee';

View file

@ -1,3 +1,4 @@
CREATE OR REPLACE FUNCTION openreplay_version AS() -> 'v1.11.0-ee';
CREATE DATABASE IF NOT EXISTS experimental;
CREATE TABLE IF NOT EXISTS experimental.autocomplete