Files
kubernetes_monitoring/values.yaml
Jon 562bb7d6f5 feat(prometheus): added config for ingress
!5 nofusscomputing/infrastructure/configuration-management/inventory-production!3
2023-11-19 18:03:17 +09:30

772 lines
21 KiB
YAML

---
# All values within this helm chart values.yaml file are under namespace `nfc_monitoring`.
# this provides the opportunity to include this helm chart as a dependency without
# variable collision
nfc_monitoring:
kubernetes:
cluster_dns_name: cluster.local
networking: calico
alert_manager:
image:
name: quay.io/prometheus/alertmanager
tag: 'v0.26.0'
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
# enabled: false # Optional, boolean.
spec:
tls:
- hosts:
- alert-manager.local
secretName: certificate-tls-alert-manager
rules:
- host: alert-manager.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: alertmanager-main
port:
name: web
labels:
app.kubernetes.io/instance: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
namespace: alerting
grafana:
# Grafana Configuration
# Type: Dict
# See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana
config:
analytics:
enabled: 'false'
# database:
# type: mysql
# host: mariadb-galera.mariadb.svc:3306
# name: grafana
# user: root
# password: admin
log:
mode: "console"
auth:
disable_login_form: "false"
security:
admin_user: admin
admin_password: admin
image:
name: grafana/grafana
tag: '10.1.2' # '10.0.5'
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
# enabled: false # Optional, boolean.
spec:
tls:
- hosts:
- grafana.local
secretName: certificate-tls-grafana
rules:
- host: grafana.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
name: grafana-http
labels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: grafana
namespace: grafana
replicas: 1
# storage_accessModes: ReadWriteMany
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
weight: 100
- preference:
matchExpressions:
- key: node-role.kubernetes.io/storage
operator: DoesNotExist
weight: 100
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus
topologyKey: kubernetes.io/hostname
weight: 10
# To add Grafan datasources
# Type: list
# See: https://grafana.com/docs/grafana/latest/administration/provisioning/#data-sources
DataSources:
- name: alertmanager
type: alertmanager
access: proxy
url: "http://alertmanager-main.{{ .Values.nfc_monitoring.alert_manager.namespace }}.svc:9093"
isDefault: false
jsonData:
tlsSkipVerify: true
timeInterval: "5s"
implementation: prometheus
handleGrafanaManagedAlerts: false
orgId: 1
editable: true
- name: loki
type: loki
access: proxy
url: "http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}"
isDefault: false
jsonData:
orgId: 1
editable: true
# - name: mimir
# type: prometheus
# access: proxy
# url: "http://mimir-gateway.metrics.svc.cluster.local/prometheus"
# isDefault: false
# jsonData:
# manageAlerts: true
# orgId: 1
# prometheusType: Mimir
# editable: true
# - name: prometheus
# type: prometheus
# access: proxy
# url: "http://prometheus-k8s.{{ .Values.nfc_monitoring.prometheus.namespace }}.svc:9090"
# isDefault: true
# jsonData:
# manageAlerts: true
# orgId: 1
# prometheusType: Prometheus
# prometheusVersion: 2.42.0
# editable: true
- name: thanos
type: prometheus
access: proxy
url: "http://thanos-query.metrics.svc:9090"
isDefault: true
jsonData:
manageAlerts: true
orgId: 1
prometheusType: Thanos
prometheusVersion: 0.31.0
editable: true
grafana_agent:
image:
name: grafana/agent
tag: 'v0.36.1'
labels:
app.kubernetes.io/instance: k8s
app.kubernetes.io/component: exporter
app.kubernetes.io/name: grafana-agent
namespace: monitoring
loki:
enabled: true
image:
name: grafana/loki
tag: 2.7.4
namespace: logging
# service name and port are used for the connection to your loki instance
service_name: loki-gateway
service_port: 80
ServiceMonitor:
selector:
matchLabels:
app.kubernetes.io/name: loki
app.kubernetes.io/component: logging
kube_monitor_proxy:
namespace: monitoring
kube_rbac_proxy:
# This image is used as part of kube-monitor-proxy.
image:
name: quay.io/brancz/kube-rbac-proxy
tag: 'v0.14.2'
kube_state_metrics:
image:
name: registry.k8s.io/kube-state-metrics/kube-state-metrics
tag: 'v2.8.1'
namespace: monitoring
prometheus:
image:
name: prom/prometheus
tag: 'v2.47.0'
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
# enabled: false # Optional, boolean.
spec:
tls:
- hosts:
- prometheus.local
secretName: certificate-tls-prometheus
rules:
- host: prometheus.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus-k8s
port:
name: web
# These labels are appended to all Prometheus items and are also the selector labels
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
namespace: monitoring
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
weight: 100
- preference:
matchExpressions:
- key: node-role.kubernetes.io/storage
operator: DoesNotExist
weight: 100
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus
topologyKey: kubernetes.io/hostname
weight: 10
# List of namespaces that prometheus is to monitor
# used to create Roles and RoleBindings
# type: list
monitor_namespaces:
- alerting
- default
# - ceph
- grafana
- monitoring
# - kube-dashboard
# - kube-metrics
- kube-policy
- kube-system
- logging
# - mariadb
# - olm
# - operators
# Deploy a generate policy for kyverno to create Role and RoleBindings
# for the prometheus service account so it can monitor
# new/existing namespaces
kyverno_role_policy: true
storage:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 40Gi
# Additional settings for Prometheus.
# See: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PrometheusSpec
# Type: dict
additional:
# remoteWrite:
# - name: mimir
# url: http://mimir-gateway.metrics.svc.cluster.local/api/v1/push
retention: 24h
retentionSize: 20GB
ruleSelector:
matchLabels:
role: alert-rules
prometheus_adaptor:
image:
name: registry.k8s.io/prometheus-adapter/prometheus-adapter
tag: 'v0.11.1'
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/instance: main
app.kubernetes.io/name: prometheus-adapter
namespace: monitoring
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
weight: 100
- preference:
matchExpressions:
- key: node-role.kubernetes.io/storage
operator: DoesNotExist
weight: 100
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus
topologyKey: kubernetes.io/hostname
weight: 10
thanos:
image:
name: thanosio/thanos
tag: v0.32.3
# Prometheus thanos sidecar
# see: https://thanos.io/tip/components/sidecar.md/
# Type: Dict
sidecar:
enabled: true
config:
type: S3
config:
bucket: "thanos-metrics"
endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
access_key: "7J5NM2MNCDB4T4Y9OKJ5"
secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
insecure: true
additions:
ceph:
enabled: true
namespace: ceph
PrometheusRules: true
ServiceMonitor:
selector:
matchLabels:
app: rook-ceph-mgr
# Add sidecar to grafana pod to load dashboards from configMap
dashboard_sidecar:
enabled: true
image:
name: ghcr.io/kiwigrid/k8s-sidecar
tag: '1.24.5'
label_name: grafana_dashboard
label_value: "1"
network_policy:
enabled: true
# Network Policies to apply. These policies are automagically build using the values below.
# What you would find under path root.spec belongs here.
#
# Do:
# - Define 'Ingress'
# - Define 'Egress'
# - Ensure that the name matches the item name from values.yaml. i.e. nfc_monitoring.{item_name}
# for prometheus the item name is 'prometheus'. This value is used to select items pertaining to
# that item from values.yaml. for example the labels and namespace.
# Dont:
# - Define 'podSelector' as this is alreaady included using the selector labels
policies:
### SoF Network Policy: Prometheus ###
- name: prometheus
policy:
egress: # ToDo: add further restrictions to egress. is variable lookup possible to obtain values????
# - {}
- to: # Alert Manager
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: alerting
podSelector:
matchLabels:
app.kubernetes.io/instance: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
ports:
- port: 9093
protocol: TCP
- to: # Ceph
- ipBlock:
cidr: 172.16.10.0/24
ports:
- port: 9283
protocol: TCP
- to: # Grafana
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: grafana
podSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: grafana
ports:
- port: 3000
protocol: TCP
- to: # Grafana Agent
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/instance: k8s
app.kubernetes.io/component: exporter
app.kubernetes.io/name: grafana-agent
ports:
- port: 12345
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
- to:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
# namespaceSelector:
# matchLabels:
# kubernetes.io/metadata.name: monitoiring
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
ports: []
- {} # ToDo: Temp rule: Allow All. this rule MUST be removed when egress has been refactored
ingress:
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
# namespaceSelector:
# matchLabels:
# kubernetes.io/metadata.name: monitoiring
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
ports: []
# - port: 8080
# protocol: TCP
# - port: 9090
# protocol: TCP
# - port: 10901
# protocol: TCP
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: grafana
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: grafana
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus-adapter
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
ports:
- port: 9090
protocol: TCP
- from: []
ports: []
policyTypes:
- Egress
- Ingress
### SoF Network Policy: Grafana ###
- name: grafana
policy:
egress:
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: alerting
podSelector:
matchLabels:
app.kubernetes.io/instance: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
ports:
- port: 9093
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: logging
podSelector:
matchLabels:
app.kubernetes.io/component: gateway
app.kubernetes.io/instance: loki
app.kubernetes.io/name: loki
ports:
- port: 80 # Service Port
protocol: TCP
- port: 8080 # Pod Port
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
podSelector:
matchLabels:
app.kubernetes.io/component: query-layer
app.kubernetes.io/instance: thanos-query
app.kubernetes.io/name: thanos-query
ports:
- port: 9090
protocol: TCP
- to: [] # Requires internet access for plugins and dashboard downloading
ports:
- port: 443
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
ingress:
- from: []
ports:
- port: 3000
protocol: TCP
policyTypes:
- Egress
- Ingress
### SoF Network Policy: Grafana Agent ###
- name: grafana_agent
policy:
egress:
- to: # Logging
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: logging
podSelector:
matchLabels:
app.kubernetes.io/component: gateway
app.kubernetes.io/instance: loki
app.kubernetes.io/name: loki
ports:
- port: 80
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
ports:
- port: 12345
protocol: TCP
policyTypes:
- Egress
- Ingress
loki_instance:
image:
name: grafana/loki
tag: 2.7.4
# tag: 2.9.0
namespace: loki
oncall_instance:
image:
name: grafana/oncall
tag: v1.1.40
# oncall:
# # image:
# # # Grafana OnCall docker image repository
# # repository: grafana/oncall
# # tag: v1.1.38
# # pullPolicy: Always
# service:
# enabled: false
# type: LoadBalancer
# port: 8080
# annotations: {}
# engine:
# replicaCount: 1
# resources:
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# celery:
# replicaCount: 1
# resources:
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# database:
# type: none