From 06d568a96e7d4502c6b381c557a1017b65c813cf Mon Sep 17 00:00:00 2001
From: rjshrjndrn <rjshrjndrn@gmail.com>
Date: Thu, 30 Jun 2022 12:54:11 +0200
Subject: [PATCH] fix(helm): value override

Signed-off-by: rjshrjndrn <rjshrjndrn@gmail.com>
---
 .../manifests/observability-values.yaml       | 537 +++++++++---------
 1 file changed, 268 insertions(+), 269 deletions(-)

diff --git a/ee/scripts/helmcharts/manifests/observability-values.yaml b/ee/scripts/helmcharts/manifests/observability-values.yaml
index f03a4f65c..fa49ab010 100644
--- a/ee/scripts/helmcharts/manifests/observability-values.yaml
+++ b/ee/scripts/helmcharts/manifests/observability-values.yaml
@@ -16,285 +16,284 @@ slackChannel: &slackchannel "changeMeAlertsChannel"
 ## Custom configuration for Monitoring and logging stack
 ########################################################
 
-observability:
-  kube-prometheus-stack:
-    fullnameOverride: "openreplay"
-    grafana:
-      adminPassword: *adminpass
-      env:
-        GF_SERVER_ROOT_URL: http://grafana.local.com/grafana
-      additionalDataSources:
-        - name: loki
-          editable: true
-          type: loki
-          url: http://loki.observability:3100
-      plugins:
-        - grafana-piechart-panel
-        - vertamedia-clickhouse-datasource
-        - digrich-bubblechart-panel
-        - grafana-clock-panel
-      ingress:
-        enabled: true
-        ingressClassName: openreplay
-        hosts:
+kube-prometheus-stack:
+  fullnameOverride: "openreplay"
+  grafana:
+    adminPassword: *adminpass
+    env:
+      GF_SERVER_ROOT_URL: http://grafana.local.com/grafana
+    additionalDataSources:
+      - name: loki
+        editable: true
+        type: loki
+        url: http://loki.observability:3100
+    plugins:
+      - grafana-piechart-panel
+      - vertamedia-clickhouse-datasource
+      - digrich-bubblechart-panel
+      - grafana-clock-panel
+    ingress:
+      enabled: true
+      ingressClassName: openreplay
+      hosts:
+        - *domainName
+      annotations:
+        nginx.ingress.kubernetes.io/rewrite-target: /$1
+      path: /grafana/(.*)
+      tls:
+        - hosts:
           - *domainName
-        annotations:
-          nginx.ingress.kubernetes.io/rewrite-target: /$1
-        path: /grafana/(.*)
-        tls:
-          - hosts:
-            - *domainName
-            secretName: openreplay-ssl
+          secretName: openreplay-ssl
 
-    prometheus:
-      prometheusSpec:
-        storageSpec:
-          volumeClaimTemplate:
-            spec:
-              accessModes: ["ReadWriteOnce"]
-              resources:
-                requests:
-                  storage: 200Gi
-    alertmanager:
-      config:
-        global:
-          resolve_timeout: 5m
-          slack_api_url: *slackwebhook
-        route:
-          # group_by: ['job']
-          group_by: ['alertname','container']
-          group_wait: 30s
-          group_interval: 5m
-          repeat_interval: 12h
-          receiver: 'slack'
-          routes:
-          - match:
-              alertname: Watchdog
-            receiver: 'slack'
-        receivers:
-        - name: slack
-          slack_configs:
-            - channel: *slackchannel
-              color: '{{ template "slack.color" . }}'
-              title: '{{ template "slack.title" . }}'
-              text: '{{ template "slack.text" . }}'
-              send_resolved: true
-              actions:
-                - type: button
-                  text: 'Runbook :green_book:'
-                  url: '{{ (index .Alerts 0).Annotations.runbook_url }}'
-             #  - type: button
-             #    text: 'Query :mag:'
-             #    url: '{{ (index .Alerts 0).GeneratorURL }}'
-             #  - type: button
-             #    text: 'Dashboard :chart_with_upwards_trend:'
-             #    url: '{{ (index .Alerts 0).Annotations.dashboard_url }}'
-             #  - type: button
-             #    text: 'Silence :no_bell:'
-             #    url: '{{ template "__alert_silence_link" . }}'
-        templates:
-        - /etc/alertmanager/config/*.tmpl
-
-      templateFiles:
-         template_1.tmpl: |-
-           {{/* Alertmanager Silence link */}}
-           {{ define "__alert_silence_link" -}}
-               {{ .ExternalURL }}/#/silences/new?filter=%7B
-               {{- range .CommonLabels.SortedPairs -}}
-                   {{- if ne .Name "alertname" -}}
-                       {{- .Name }}%3D"{{- .Value -}}"%2C%20
-                   {{- end -}}
-               {{- end -}}
-               alertname%3D"{{- .CommonLabels.alertname -}}"%7D
-           {{- end }}
-           
-           {{/* Severity of the alert */}}
-           {{ define "__alert_severity" -}}
-               {{- if eq .CommonLabels.severity "critical" -}}
-               *Severity:* `Critical`
-               {{- else if eq .CommonLabels.severity "warning" -}}
-               *Severity:* `Warning`
-               {{- else if eq .CommonLabels.severity "info" -}}
-               *Severity:* `Info`
-               {{- else -}}
-               *Severity:* :question: {{ .CommonLabels.severity }}
-               {{- end }}
-           {{- end }}
-           
-           {{/* Title of the Slack alert */}}
-           {{ define "slack.title" -}}
-             [{{ .Status | toUpper -}}
-             {{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
-             ] {{ .CommonLabels.alertname }}
-           {{- end }}
-           
-           
-           {{/* Color of Slack attachment (appears as line next to alert )*/}}
-           {{ define "slack.color" -}}
-               {{ if eq .Status "firing" -}}
-                   {{ if eq .CommonLabels.severity "warning" -}}
-                       warning
-                   {{- else if eq .CommonLabels.severity "critical" -}}
-                       danger
-                   {{- else -}}
-                       #439FE0
-                   {{- end -}}
-               {{ else -}}
-               good
-               {{- end }}
-           {{- end }}
-           
-           {{/* The text to display in the alert */}}
-           {{ define "slack.text" -}}
-           
-               {{ template "__alert_severity" . }}
-               {{- if (index .Alerts 0).Annotations.summary }}
-               {{- "\n" -}}
-               *Summary:* {{ (index .Alerts 0).Annotations.summary }}
-               {{- end }}
-           
-               {{ range .Alerts }}
-           
-                   {{- if .Annotations.description }}
-                   {{- "\n" -}}
-                   {{ .Annotations.description }}
-                   {{- "\n" -}}
-                   {{- end }}
-                   {{- if .Annotations.message }}
-                   {{- "\n" -}}
-                   {{ .Annotations.message }}
-                   {{- "\n" -}}
-                   {{- end }}
-           
-               {{- end }}
-           
-           {{- end }}
-  loki:
+  prometheus:
+    prometheusSpec:
+      storageSpec:
+        volumeClaimTemplate:
+          spec:
+            accessModes: ["ReadWriteOnce"]
+            resources:
+              requests:
+                storage: 200Gi
+  alertmanager:
     config:
-      # existingSecret:
-      auth_enabled: false
-      ingester:
-        chunk_idle_period: 3m
-        chunk_block_size: 262144
-        chunk_retain_period: 1m
-        max_transfer_retries: 0
-        wal:
-          dir: /data/loki/wal
-        lifecycler:
-          ring:
-            kvstore:
-              store: inmemory
-            replication_factor: 1
+      global:
+        resolve_timeout: 5m
+        slack_api_url: *slackwebhook
+      route:
+        # group_by: ['job']
+        group_by: ['alertname','container']
+        group_wait: 30s
+        group_interval: 5m
+        repeat_interval: 12h
+        receiver: 'slack'
+        routes:
+        - match:
+            alertname: Watchdog
+          receiver: 'slack'
+      receivers:
+      - name: slack
+        slack_configs:
+          - channel: *slackchannel
+            color: '{{ template "slack.color" . }}'
+            title: '{{ template "slack.title" . }}'
+            text: '{{ template "slack.text" . }}'
+            send_resolved: true
+            actions:
+              - type: button
+                text: 'Runbook :green_book:'
+                url: '{{ (index .Alerts 0).Annotations.runbook_url }}'
+           #  - type: button
+           #    text: 'Query :mag:'
+           #    url: '{{ (index .Alerts 0).GeneratorURL }}'
+           #  - type: button
+           #    text: 'Dashboard :chart_with_upwards_trend:'
+           #    url: '{{ (index .Alerts 0).Annotations.dashboard_url }}'
+           #  - type: button
+           #    text: 'Silence :no_bell:'
+           #    url: '{{ template "__alert_silence_link" . }}'
+      templates:
+      - /etc/alertmanager/config/*.tmpl
 
-      limits_config:
-        enforce_metric_name: false
-        reject_old_samples: true
-        reject_old_samples_max_age: 168h
-      schema_config:
-        configs:
-        - from: 2020-10-24
-          store: boltdb-shipper
-          object_store: filesystem
-          schema: v11
-          index:
-            prefix: index_
-            period: 24h
-      server:
-        http_listen_port: 3100
-      storage_config:
-        boltdb_shipper:
-          active_index_directory: /data/loki/boltdb-shipper-active
-          cache_location: /data/loki/boltdb-shipper-cache
-          cache_ttl: 24h         # Can be increased for faster performance over longer query periods, uses more disk space
-          shared_store: filesystem
-        filesystem:
-          directory: /data/loki/chunks
-      chunk_store_config:
-        max_look_back_period: 0s
-      table_manager:
-        retention_deletes_enabled: false
-        retention_period: 0s
-      compactor:
-        working_directory: /data/loki/boltdb-shipper-compactor
-        shared_store: filesystem
-        retention_enabled: true
-    # Needed for Alerting: https://grafana.com/docs/loki/latest/rules/
-    # This is just a simple example, for more details: https://grafana.com/docs/loki/latest/configuration/#ruler_config
-      ruler:
-        storage:
-          type: local
-          local:
-            directory: /rules
-        rule_path: /tmp/scratch
-        alertmanager_url: http://openreplay-alertmanager.observability.svc.cluster.local:9093
+    templateFiles:
+       template_1.tmpl: |-
+         {{/* Alertmanager Silence link */}}
+         {{ define "__alert_silence_link" -}}
+             {{ .ExternalURL }}/#/silences/new?filter=%7B
+             {{- range .CommonLabels.SortedPairs -}}
+                 {{- if ne .Name "alertname" -}}
+                     {{- .Name }}%3D"{{- .Value -}}"%2C%20
+                 {{- end -}}
+             {{- end -}}
+             alertname%3D"{{- .CommonLabels.alertname -}}"%7D
+         {{- end }}
+         
+         {{/* Severity of the alert */}}
+         {{ define "__alert_severity" -}}
+             {{- if eq .CommonLabels.severity "critical" -}}
+             *Severity:* `Critical`
+             {{- else if eq .CommonLabels.severity "warning" -}}
+             *Severity:* `Warning`
+             {{- else if eq .CommonLabels.severity "info" -}}
+             *Severity:* `Info`
+             {{- else -}}
+             *Severity:* :question: {{ .CommonLabels.severity }}
+             {{- end }}
+         {{- end }}
+         
+         {{/* Title of the Slack alert */}}
+         {{ define "slack.title" -}}
+           [{{ .Status | toUpper -}}
+           {{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
+           ] {{ .CommonLabels.alertname }}
+         {{- end }}
+         
+         
+         {{/* Color of Slack attachment (appears as line next to alert )*/}}
+         {{ define "slack.color" -}}
+             {{ if eq .Status "firing" -}}
+                 {{ if eq .CommonLabels.severity "warning" -}}
+                     warning
+                 {{- else if eq .CommonLabels.severity "critical" -}}
+                     danger
+                 {{- else -}}
+                     #439FE0
+                 {{- end -}}
+             {{ else -}}
+             good
+             {{- end }}
+         {{- end }}
+         
+         {{/* The text to display in the alert */}}
+         {{ define "slack.text" -}}
+         
+             {{ template "__alert_severity" . }}
+             {{- if (index .Alerts 0).Annotations.summary }}
+             {{- "\n" -}}
+             *Summary:* {{ (index .Alerts 0).Annotations.summary }}
+             {{- end }}
+         
+             {{ range .Alerts }}
+         
+                 {{- if .Annotations.description }}
+                 {{- "\n" -}}
+                 {{ .Annotations.description }}
+                 {{- "\n" -}}
+                 {{- end }}
+                 {{- if .Annotations.message }}
+                 {{- "\n" -}}
+                 {{ .Annotations.message }}
+                 {{- "\n" -}}
+                 {{- end }}
+         
+             {{- end }}
+         
+         {{- end }}
+loki:
+  config:
+    # existingSecret:
+    auth_enabled: false
+    ingester:
+      chunk_idle_period: 3m
+      chunk_block_size: 262144
+      chunk_retain_period: 1m
+      max_transfer_retries: 0
+      wal:
+        dir: /data/loki/wal
+      lifecycler:
         ring:
           kvstore:
             store: inmemory
-        enable_api: true
+          replication_factor: 1
 
-    persistence:
-      enabled: true
-      accessModes:
-      - ReadWriteOnce
-      size: 100Gi
+    limits_config:
+      enforce_metric_name: false
+      reject_old_samples: true
+      reject_old_samples_max_age: 168h
+    schema_config:
+      configs:
+      - from: 2020-10-24
+        store: boltdb-shipper
+        object_store: filesystem
+        schema: v11
+        index:
+          prefix: index_
+          period: 24h
+    server:
+      http_listen_port: 3100
+    storage_config:
+      boltdb_shipper:
+        active_index_directory: /data/loki/boltdb-shipper-active
+        cache_location: /data/loki/boltdb-shipper-cache
+        cache_ttl: 24h         # Can be increased for faster performance over longer query periods, uses more disk space
+        shared_store: filesystem
+      filesystem:
+        directory: /data/loki/chunks
+    chunk_store_config:
+      max_look_back_period: 0s
+    table_manager:
+      retention_deletes_enabled: false
+      retention_period: 0s
+    compactor:
+      working_directory: /data/loki/boltdb-shipper-compactor
+      shared_store: filesystem
+      retention_enabled: true
+  # Needed for Alerting: https://grafana.com/docs/loki/latest/rules/
+  # This is just a simple example, for more details: https://grafana.com/docs/loki/latest/configuration/#ruler_config
+    ruler:
+      storage:
+        type: local
+        local:
+          directory: /rules
+      rule_path: /tmp/scratch
+      alertmanager_url: http://openreplay-alertmanager.observability.svc.cluster.local:9093
+      ring:
+        kvstore:
+          store: inmemory
+      enable_api: true
 
-    serviceMonitor:
+  persistence:
+    enabled: true
+    accessModes:
+    - ReadWriteOnce
+    size: 100Gi
+
+  serviceMonitor:
+    enabled: true
+    interval: ""
+    additionalLabels:
+      release: monitoring
+    annotations: {}
+    # scrapeTimeout: 10s
+    # path: /metrics
+    prometheusRule:
       enabled: true
-      interval: ""
       additionalLabels:
         release: monitoring
-      annotations: {}
-      # scrapeTimeout: 10s
-      # path: /metrics
-      prometheusRule:
-        enabled: true
-        additionalLabels:
-          release: monitoring
-        rules: 
-         - alert: LokiProcessTooManyRestarts
-           expr: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
-           for: 0m
-           labels:
-             severity: warning
-           annotations:
-             summary: Loki process too many restarts (instance {{ $labels.instance }})
-             description: "A loki process had too many restarts (target {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-         - alert: LokiRequestErrors
-           expr: 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) > 10
-           for: 15m
-           labels:
-             severity: critical
-           annotations:
-             summary: Loki request errors (instance {{ $labels.instance }})
-             description: "The {{ $labels.job }} and {{ $labels.route }} are experiencing errors\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-         - alert: LokiRequestPanic
-           expr: sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
-           for: 5m
-           labels:
-             severity: critical
-           annotations:
-             summary: Loki request panic (instance {{ $labels.instance }})
-             description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% increase of panics\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-         - alert: LokiRequestLatency
-           expr: (histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[5m])) by (le)))  > 1
-           for: 5m
-           labels:
-             severity: critical
-           annotations:
-             summary: Loki request latency (instance {{ $labels.instance }})
-             description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+      rules: 
+       - alert: LokiProcessTooManyRestarts
+         expr: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2
+         for: 0m
+         labels:
+           severity: warning
+         annotations:
+           summary: Loki process too many restarts (instance {{ $labels.instance }})
+           description: "A loki process had too many restarts (target {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+       - alert: LokiRequestErrors
+         expr: 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) > 10
+         for: 15m
+         labels:
+           severity: critical
+         annotations:
+           summary: Loki request errors (instance {{ $labels.instance }})
+           description: "The {{ $labels.job }} and {{ $labels.route }} are experiencing errors\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+       - alert: LokiRequestPanic
+         expr: sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
+         for: 5m
+         labels:
+           severity: critical
+         annotations:
+           summary: Loki request panic (instance {{ $labels.instance }})
+           description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% increase of panics\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+       - alert: LokiRequestLatency
+         expr: (histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[5m])) by (le)))  > 1
+         for: 5m
+         labels:
+           severity: critical
+         annotations:
+           summary: Loki request latency (instance {{ $labels.instance }})
+           description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 
-    # Specify Loki Alerting rules based on this documentation: https://grafana.com/docs/loki/latest/rules/
-    # When specified, you also need to add a ruler config section above. An example is shown in the alerting docs.
-    alerting_groups: 
-      - name: dbZombie
-        rules:
-        - alert: dbZombie
-          expr: absent_over_time({namespace="app", app="db"} |~ "Queue Statistics"[5m]) == 1
-          for: 10m
-          labels:
-            severity: warning
-  promtail:
-    config:
-      clients:
-        - url: http://loki.observability.svc.cluster.local:3100/loki/api/v1/push 
+  # Specify Loki Alerting rules based on this documentation: https://grafana.com/docs/loki/latest/rules/
+  # When specified, you also need to add a ruler config section above. An example is shown in the alerting docs.
+  alerting_groups: 
+    - name: dbZombie
+      rules:
+      - alert: dbZombie
+        expr: absent_over_time({namespace="app", app="db"} |~ "Queue Statistics"[5m]) == 1
+        for: 10m
+        labels:
+          severity: warning
+promtail:
+  config:
+    clients:
+      - url: http://loki.observability.svc.cluster.local:3100/loki/api/v1/push