kubernetes_monitoring/values.yaml

---

# All values within this helm chart values.yaml file are under namespace `nfc_monitoring`.
# this provides the opportunity to include this helm chart as a dependency without
# variable collision

nfc_monitoring:

  kubernetes:
    cluster_dns_name: cluster.local
    networking: calico


  alert_manager:

    enabled: true
    image:
      name: quay.io/prometheus/alertmanager
      tag: 'v0.26.0'

    # How many replicas to deploy
    replicas: 1


    ingress:
      annotations:
        cert-manager.io/cluster-issuer: "selfsigned-issuer"
        nginx.ingress.kubernetes.io/ssl-redirect: "true"

      enabled: false

      hostname: alert-manager.local


    labels:
      app.kubernetes.io/component: alert-router
      app.kubernetes.io/name: alertmanager

    namespace: alerting


  grafana:

    dashboards:
      cert_manager: false

    enabled: false

    # Grafana Configuration
    # Type: Dict
    # See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana
    config:
      analytics:
        enabled: 'false'
      # database:
      #   type: mysql
      #   host: mariadb-galera.mariadb.svc:3306
      #   name: grafana
      #   user: root
      #   password: admin

      log:
        mode: "console"
      auth:
        disable_login_form: "false"
      security:
        admin_user: admin
        admin_password: admin

    image:
      name: grafana/grafana
      tag: '10.3.1' # '10.0.5'

    ingress:
      annotations:
        cert-manager.io/cluster-issuer: "selfsigned-issuer"
        nginx.ingress.kubernetes.io/ssl-redirect: "true"

      enabled: true

      hostname:  grafana.local


    labels:
      app.kubernetes.io/component: graphing
      app.kubernetes.io/name: grafana

    namespace: grafana

    replicas: 1

    # storage_accessModes: ReadWriteMany

    affinity:
      nodeAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - preference:
            matchExpressions:
            - key: node-role.kubernetes.io/worker
              operator: Exists
          weight: 100
        - preference:
            matchExpressions:
            - key: node-role.kubernetes.io/storage
              operator: DoesNotExist
          weight: 100
      podAntiAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - podAffinityTerm:
            labelSelector:
              matchExpressions:
              - key: app.kubernetes.io/name
                operator: In
                values:
                - prometheus
            topologyKey: kubernetes.io/hostname
          weight: 10

    # To add Grafan datasources
    # Type: list
    # See: https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources
    DataSources:
      - name: alertmanager
        type: alertmanager
        access: proxy
        url: "http://alertmanager-main.{{ .Values.nfc_monitoring.alert_manager.namespace }}.svc:9093"
        isDefault: false
        jsonData:
          tlsSkipVerify: true
          timeInterval: "5s"
          implementation: prometheus
          handleGrafanaManagedAlerts: false
          orgId: 1
        editable: true

      - name: loki
        type: loki
        access: proxy
        url: "http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}"
        isDefault: false
        jsonData:
          orgId: 1
        editable: true

      # - name: mimir
      #   type: prometheus
      #   access: proxy
      #   url: "http://mimir-gateway.metrics.svc.cluster.local/prometheus"
      #   isDefault: false
      #   jsonData:
      #     manageAlerts: true
      #     orgId: 1
      #     prometheusType: Mimir
      #   editable: true

      # - name: prometheus
      #   type: prometheus
      #   access: proxy
      #   url: "http://prometheus-k8s.{{ .Values.nfc_monitoring.prometheus.namespace }}.svc:9090"
      #   isDefault: true
      #   jsonData:
      #     manageAlerts: true
      #     orgId: 1
      #     prometheusType: Prometheus
      #     prometheusVersion: 2.42.0
      #   editable: true

      - name: thanos
        type: prometheus
        access: proxy
        url: "http://thanos-query.metrics.svc:9090"
        isDefault: true
        jsonData:
          manageAlerts: true
          orgId: 1
          prometheusType: Thanos
          prometheusVersion: 0.31.0
        editable: true


  grafana_agent:

    enabled: true

    image:
      name: grafana/agent
      tag: 'v0.39.2'

    labels:
      app.kubernetes.io/component: exporter
      app.kubernetes.io/name: grafana-agent

    namespace: monitoring


  loki:

    enabled: true

    image:
      name: grafana/loki
      tag: 2.7.4

    namespace: logging

    # If no config is setup, logging will not be enabled.
    config: {}
    # service name and port are used for the connection to your loki instance
    # service_name: loki-gateway
    # service_port: 80

    ServiceMonitor:
      selector:
        matchLabels:
          app.kubernetes.io/name: loki
          app.kubernetes.io/component: logging


  kube_monitor_proxy:
    enabled: false
    namespace: monitoring


  kube_rbac_proxy:

    # This image is used as part of kube-monitor-proxy.
    image:
      name: quay.io/brancz/kube-rbac-proxy
      tag: 'v0.14.2'


  kube_state_metrics:

    enabled: false
    image:
      name: registry.k8s.io/kube-state-metrics/kube-state-metrics
      tag: 'v2.8.1'
    namespace: monitoring


  prometheus:

    image:
      name: prom/prometheus
      tag: 'v2.49.0'

    # How many replicas to deploy
    replicas: 1

    # alertmanagers:
    #   - name:

    # Configure prometheus to write metrics to remote host
    # below example config uses a secret named "prometheus-remote-write" with two keys username and password.
    # Documentation: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.RemoteWriteSpec
    remotewrite: {}
      # url:
      # name:
      # remoteTimeout: 30
      # writeRelabelConfigs:
      # basicAuth:
      #   username:
      #     name: prometheus-remote-write
      #     key: username
      #   password:
      #     name: prometheus-remote-write
      #     key: password


    ingress:
      annotations:
        cert-manager.io/cluster-issuer: "selfsigned-issuer"
        nginx.ingress.kubernetes.io/ssl-redirect: "true"
      enabled: true
      hostname:  prometheus.local


    # These labels are appended to all Prometheus items and are also the selector labels
    labels:
      app.kubernetes.io/component: prometheus
      app.kubernetes.io/name: prometheus

    namespace: monitoring

    affinity:
      nodeAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - preference:
            matchExpressions:
            - key: node-role.kubernetes.io/worker
              operator: Exists
          weight: 100
        - preference:
            matchExpressions:
            - key: node-role.kubernetes.io/storage
              operator: DoesNotExist
          weight: 100
      podAntiAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - podAffinityTerm:
            labelSelector:
              matchExpressions:
              - key: app.kubernetes.io/name
                operator: In
                values:
                - prometheus
            topologyKey: kubernetes.io/hostname
          weight: 10


    # Deploy a generate policy for kyverno to create Role and RoleBindings
    # for the prometheus service account so it can monitor
    # new/existing namespaces
    kyverno_role_policy: false

    storage:
      volumeClaimTemplate:
        spec:
          accessModes:
            - ReadWriteOnce
          resources:
            requests:
              storage: 40Gi

    # Additional settings for Prometheus.
    # See: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PrometheusSpec
    # Type: dict
    additional:

      # Don't declare remoteWrite Here, as it's don at path .prometheus.remote_write
      # remoteWrite:

      retention: 24h
      retentionSize: 2GB
      ruleSelector:
        matchLabels:
          role: alert-rules

    service_monitor:
      apiserver: false
      cadvisor: false
      calico: false
      ceph: false
      coredns: false
      kube_controller_manager: false
      kubelet: false
      kube_scheduler: false


  prometheus_adaptor:

    enalbed: false

    image:
      name: registry.k8s.io/prometheus-adapter/prometheus-adapter
      tag: 'v0.11.1'

    labels:
      app.kubernetes.io/component: metrics-adapter
      app.kubernetes.io/name: prometheus-adapter

    namespace: monitoring

    affinity:
      nodeAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - preference:
            matchExpressions:
            - key: node-role.kubernetes.io/worker
              operator: Exists
          weight: 100
        - preference:
            matchExpressions:
            - key: node-role.kubernetes.io/storage
              operator: DoesNotExist
          weight: 100
      podAntiAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - podAffinityTerm:
            labelSelector:
              matchExpressions:
              - key: app.kubernetes.io/name
                operator: In
                values:
                - prometheus
            topologyKey: kubernetes.io/hostname
          weight: 10

  thanos:
    image:
      name: thanosio/thanos
      tag: v0.32.3

    # Prometheus thanos sidecar
    # see: https://thanos.io/tip/components/sidecar.md/
    # Type: Dict
    sidecar:

      enabled: true

      # Config must be specified for the sidecar to deploy
      config: {}
      #   type: S3
      #   config:
      #     bucket: "thanos-metrics"
      #     endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
      #     access_key: "7J5NM2MNCDB4T4Y9OKJ5"
      #     secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
      #     insecure: true


  additions:

    ceph:

      enabled: false

      namespace: ceph

      PrometheusRules: true

      ServiceMonitor:

        selector:
          matchLabels:
            app: rook-ceph-mgr

    # Add sidecar to grafana pod to load dashboards from configMap
    dashboard_sidecar:

      enabled: false

      image:
        name: ghcr.io/kiwigrid/k8s-sidecar
        tag: '1.24.5'

      label_name: grafana_dashboard
      label_value: "1"


  network_policy:

    enabled: false


loki_instance:
  image:
    name: grafana/loki
    tag: 2.7.4
    # tag: 2.9.0
  namespace: loki


oncall_instance:
  image:
    name: grafana/oncall
    tag: v1.1.40


# oncall:

#   # image:
#   #   # Grafana OnCall docker image repository
#   #   repository: grafana/oncall
#   #   tag: v1.1.38
#   #   pullPolicy: Always

#   service:
#     enabled: false
#     type: LoadBalancer
#     port: 8080
#     annotations: {}

#   engine:
#     replicaCount: 1
#     resources:
#       limits:
#         cpu: 100m
#         memory: 128Mi
#       requests:
#         cpu: 100m
#         memory: 128Mi

#   celery:
#     replicaCount: 1
#     resources:
#       limits:
#         cpu: 100m
#         memory: 128Mi
#       requests:
#         cpu: 100m
#         memory: 128Mi
#   database:
#     type: none