76 Commits

Author SHA1 Message Date
d25cee3c8c Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!28
2025-02-26 13:09:42 +00:00
05bd0cd6d4 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "6f8dfcba0b25313b59bc17b4c99d674fcedd207a)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "224ef831571458ad433a0143eec00df0f7e8b409)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 224ef831571458ad433a0143eec00df0f7e8b409

MR !28
2025-02-26 13:08:57 +00:00
ac6b7ef31e Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!25
2024-08-19 07:25:36 +00:00
6b44c1df69 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "58ffcabbfb503af3e57d9cb3ab43931b23dc4cd8)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "6f8dfcba0b25313b59bc17b4c99d674fcedd207a)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 6f8dfcba0b25313b59bc17b4c99d674fcedd207a

MR !25
2024-08-19 07:24:49 +00:00
d5deb022c5 Merge branch 'automated-tasks' into 'development'
chore(website-template): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!24
2024-08-01 06:09:00 +00:00
21c5e87c85 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "673441f83a7d943434252ee23899e3572cdfb141)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "58ffcabbfb503af3e57d9cb3ab43931b23dc4cd8)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 58ffcabbfb503af3e57d9cb3ab43931b23dc4cd8

MR !24
2024-08-01 06:08:17 +00:00
6ac6ced4bb chore(git): updated submodule website-template
Automation Data:
{
    "branch": "development",
    "current_commit": "92c4b16a14524e7b5b18171d4e21b72676c36fbf)",
    "name": "website-template",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/website-template",
    "remote_head": "8735f623dc5e1d9c9e46e50db03b4c41cb3d1efd)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/infrastructure/website-template.git"
}

Changes: Submodule path website-template: checked out 8735f623dc5e1d9c9e46e50db03b4c41cb3d1efd

MR !24
2024-06-30 18:03:35 +00:00
39f26c8f82 Merge branch 'automated-tasks' into 'development'
chore(website-template): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!23
2024-06-30 17:48:54 +00:00
13e60d0a76 chore(git): updated submodule website-template
Automation Data:
{
    "branch": "development",
    "current_commit": "f5a82d3604faca56756eec91acee28ff89defd1d)",
    "name": "website-template",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/website-template",
    "remote_head": "92c4b16a14524e7b5b18171d4e21b72676c36fbf)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/infrastructure/website-template.git"
}

Changes: Submodule path website-template: checked out 92c4b16a14524e7b5b18171d4e21b72676c36fbf

MR !23
2024-06-30 17:48:10 +00:00
22d3308464 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!22
2024-06-30 17:46:48 +00:00
a83648e2ac chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "a24f352ca3d82b8d0f02f5db20173fe2c3f71a4a)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "673441f83a7d943434252ee23899e3572cdfb141)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 673441f83a7d943434252ee23899e3572cdfb141

MR !22
2024-06-30 17:46:02 +00:00
b01c6bbb06 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!21
2024-03-16 11:35:32 +00:00
d6e21083c9 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "9afa68d1f3849e491fa8ca034749388808531b74)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "a24f352ca3d82b8d0f02f5db20173fe2c3f71a4a)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out a24f352ca3d82b8d0f02f5db20173fe2c3f71a4a

MR !21
2024-03-16 11:34:52 +00:00
bfa20b6a09 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!20
2024-03-14 12:47:17 +00:00
d6cf67b930 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "41eeb7badd582175b371cd4a5b2192decbcb0210)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "9afa68d1f3849e491fa8ca034749388808531b74)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 9afa68d1f3849e491fa8ca034749388808531b74

MR !20
2024-03-14 12:46:20 +00:00
5aea6f620e Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!19
2024-02-25 09:32:31 +00:00
3a31680ab5 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "6f80ea3af7fdc64e9998820a8800c288d7facbc6)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "41eeb7badd582175b371cd4a5b2192decbcb0210)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 41eeb7badd582175b371cd4a5b2192decbcb0210

MR !19
2024-02-25 09:31:52 +00:00
e22e98fd08 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!18
2024-02-24 06:31:22 +00:00
78ff4066ea chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "4f65bc1367585146490637dfc7c57c987216e652)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "6f80ea3af7fdc64e9998820a8800c288d7facbc6)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 6f80ea3af7fdc64e9998820a8800c288d7facbc6

MR !18
2024-02-24 06:30:29 +00:00
3b33cf43d8 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!17
2024-02-23 09:07:43 +00:00
e5bc593611 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "8094694d43449f1d17b763e215485b2950e6b6b4)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "4f65bc1367585146490637dfc7c57c987216e652)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 4f65bc1367585146490637dfc7c57c987216e652

MR !17
2024-02-23 09:06:59 +00:00
e4a98648e2 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!16
2024-02-23 02:52:07 +00:00
8d1ad238e4 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "34c81c98494b5ce448f4da4e645952439c897906)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "8094694d43449f1d17b763e215485b2950e6b6b4)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 8094694d43449f1d17b763e215485b2950e6b6b4

MR !16
2024-02-23 02:51:26 +00:00
2bfa45d5a3 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!15
2024-02-22 09:54:06 +00:00
a33a0514d7 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "c7c966bbee4fefc044d4c58e60dd5f10ec63862b)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "34c81c98494b5ce448f4da4e645952439c897906)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 34c81c98494b5ce448f4da4e645952439c897906

MR !15
2024-02-22 09:53:24 +00:00
ea59c866d6 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!14
2024-02-22 08:35:20 +00:00
2bdde14a5a chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "74ac15925c75ba0178ae21932b02b6a90a9169c3)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "c7c966bbee4fefc044d4c58e60dd5f10ec63862b)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out c7c966bbee4fefc044d4c58e60dd5f10ec63862b

MR !14
2024-02-22 08:34:39 +00:00
07be00f24c Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!13
2024-02-22 08:05:01 +00:00
452087a111 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "e046f9ea49a617ce91ff2eda53b897f798dfb810)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "74ac15925c75ba0178ae21932b02b6a90a9169c3)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 74ac15925c75ba0178ae21932b02b6a90a9169c3

MR !13
2024-02-22 08:04:18 +00:00
df7917aef8 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!12
2024-02-22 06:31:22 +00:00
dbacb1794c chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "d29064f1490073599518b629c7bf6585b48c8736)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "e046f9ea49a617ce91ff2eda53b897f798dfb810)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out e046f9ea49a617ce91ff2eda53b897f798dfb810

MR !12
2024-02-22 06:30:38 +00:00
eecb42a4e5 Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!11
2024-02-17 04:50:21 +00:00
d53d28314e chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "02252db664a428e83fb9ae24662b56b53e615989)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "d29064f1490073599518b629c7bf6585b48c8736)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out d29064f1490073599518b629c7bf6585b48c8736

MR !11
2024-02-17 04:49:41 +00:00
0a0d37e44d Merge branch 'automated-tasks' into 'development'
chore(gitlab-ci): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!10
2024-02-15 13:49:09 +00:00
65e247958c chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "bea398200f838736c6111a399478667df37435cb)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "02252db664a428e83fb9ae24662b56b53e615989)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out 02252db664a428e83fb9ae24662b56b53e615989

MR !10
2024-02-15 13:48:27 +00:00
fce97d5aa2 Merge branch 'automated-tasks' into 'development'
chore(website-template): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!9
2024-02-08 05:32:49 +00:00
b2d3cad87d chore(git): updated submodule website-template
Automation Data:
{
    "branch": "development",
    "current_commit": "2bcc17652babd4027e7245c6367841e2580ec317)",
    "name": "website-template",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/website-template",
    "remote_head": "f5a82d3604faca56756eec91acee28ff89defd1d)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/infrastructure/website-template.git"
}

Changes: Submodule path website-template: checked out f5a82d3604faca56756eec91acee28ff89defd1d

MR !9
2024-02-08 05:32:09 +00:00
Jon
496c7637c3 Merge branch 'feat-refine-deployment-options' into 'development'
feat: refine deployment options

See merge request nofusscomputing/projects/kubernetes_monitoring!8
2024-02-07 08:32:40 +00:00
Jon
99e503324d feat(grafana_dashboard): add cert manager dashboard
!8
2024-02-05 16:01:08 +09:30
Jon
486f2c4728 refactor(service_monitor): use job name prometheus for prometheus
required for built in grafana dashboards to work

!8
2024-02-05 15:11:49 +09:30
Jon
a2c3daa44e fix(service_monitor): correct syntax for calico
!8
2024-02-05 14:17:27 +09:30
Jon
faf4abf6b3 feat(grafana_dashboard): add calico dashboard
!8
2024-02-05 14:16:28 +09:30
Jon
cd2b99dd3d fix(service_monitor): dont deploy calico unless enabled
!8
2024-02-05 14:15:53 +09:30
Jon
efd6d15dc4 fix(role_binding): use namespace lookup to build role bindings
!8
2024-02-05 13:37:10 +09:30
Jon
f08cba1dfb feat(grafana): update to latest version 10.3.1
!8
2024-02-05 13:36:30 +09:30
Jon
49bf414caa refactor(network_policy): move network policy to template
!8
2024-02-05 11:31:39 +09:30
Jon
36ee3a10ff feat(role): dynamically add roles to all available namesapces
!8
2024-02-05 10:38:28 +09:30
Jon
6a20b69910 docs(values): notate remotewirte configured seperatly
!8
2024-02-05 10:25:52 +09:30
Jon
1fd5e49247 refactor(prometheus_rule): correct common rules name
!8
2024-02-04 20:26:39 +09:30
Jon
5323377852 fix(prometheus): use alertmanager instance name
!8
2024-02-04 20:24:46 +09:30
Jon
4e8f25ec3d refactor(prometheus_rule): move watchdog and info inhibitor to common rules file
these rules ar for all metrics rules

!8
2024-02-04 20:22:58 +09:30
Jon
cb12f338f1 fix(prometheus_rule): dont deploy loki rules of not configured
!8
2024-02-04 20:12:45 +09:30
Jon
39af78c6ea fix(prometheus_rule): use instance name instead of hard coded value
!8
2024-02-04 20:05:28 +09:30
Jon
73f25cfaa2 feat(prometheus_rule): add node exporter absent alert for ALL nodes
!8
2024-02-04 19:58:15 +09:30
Jon
beaa4f4896 fix(calico): use the operator created ns for monitoring
!8
2024-02-04 17:33:16 +09:30
Jon
e8b4b5a00b fix(ingress): move from values to template for dynomagic setup
!8
2024-02-04 17:07:31 +09:30
Jon
490e497d15 refactor: remove k8s and use release name for instance
!8
2024-02-04 17:06:39 +09:30
Jon
38f08985f5 fix(values): fix service_monitor variable
!8
2024-02-04 16:07:49 +09:30
Jon
c7746122cd fix(prometheus): service monitor missing close
!8
2024-02-04 15:56:50 +09:30
Jon
57a1706590 chore: set values.yaml to have sensible defaults
!8
2024-02-04 15:55:16 +09:30
Jon
bdb555a4b5 feat: turn off k8s non-metrics server deployed features
!8
2024-02-04 15:51:28 +09:30
Jon
bdb3a09c2b feat(kube_monitor_proxy): don't deploy if not enabled
!8
2024-02-04 15:49:43 +09:30
Jon
9c35a4d140 feat(kube_state_metrics): don't deploy if not enabled
!8
2024-02-04 15:38:19 +09:30
Jon
bc4d72ff8e feat(service_monitor): don't deploy if not enabled
!8
2024-02-04 15:33:50 +09:30
Jon
e13d55e61e feat(grafana_agent): not enable if not configured
!8
2024-02-04 15:08:53 +09:30
Jon
257da9cd38 fix(prometheus): configurable image and tag
!8
2024-02-04 15:07:13 +09:30
Jon
106f2e6ec8 feat(prometheus): configurable remote write
!8
2024-02-04 15:06:44 +09:30
Jon
f1c54567a7 feat(prometheus): configurable replicas
!8
2024-02-04 15:06:16 +09:30
Jon
cd2bceec3a feat(thanos_sidecar): if enabled must also be configured for deploy to occur
!8
2024-02-04 15:04:24 +09:30
Jon
18649086b5 feat(alertmanager): instance name set to pod name
!8
2024-02-04 15:02:34 +09:30
Jon
e0cb8f57e2 feat(grafana): enabled/disabled configurable
!8
2024-02-04 15:02:00 +09:30
Jon
c5bb46f48a feat(alertmanager): configurable replicas
!8
2024-02-04 14:58:08 +09:30
103a529184 Merge branch 'automated-tasks' into 'development'
chore(website-template): Automated update of git sub-module

See merge request nofusscomputing/projects/kubernetes_monitoring!7
2024-02-02 13:12:16 +00:00
51a187bb75 chore(git): updated submodule gitlab-ci
Automation Data:
{
    "branch": "development",
    "current_commit": "a5a9fa44374107657b2587ce52607d96a825be56)",
    "name": "gitlab-ci",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/gitlab-ci",
    "remote_head": "bea398200f838736c6111a399478667df37435cb)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/projects/gitlab-ci.git"
}

Changes: Submodule path gitlab-ci: checked out bea398200f838736c6111a399478667df37435cb

MR !7
2024-02-02 13:11:36 +00:00
f613ba29cd chore(git): updated submodule website-template
Automation Data:
{
    "branch": "development",
    "current_commit": "992b54805b8b6c78a3d2a5ea7de71c7be2b070c8)",
    "name": "website-template",
    "path": "/builds/nofusscomputing/projects/kubernetes_monitoring/_automation_/website-template",
    "remote_head": "2bcc17652babd4027e7245c6367841e2580ec317)",
    "remote_name": "origin",
    "url": "https://gitlab.com/nofusscomputing/infrastructure/website-template.git"
}

Changes: Submodule path website-template: checked out 2bcc17652babd4027e7245c6367841e2580ec317

MR !7
2024-02-02 13:11:27 +00:00
Jon
3eafca6c41 feat(nfc_automation): add automation file
!5
2024-02-02 22:32:19 +09:30
96 changed files with 961 additions and 565 deletions

10
.nfc_automation.yaml Normal file
View File

@ -0,0 +1,10 @@
---
role_git_conf:
gitlab:
submodule_branch: "development"
default_branch: development
mr_labels: ~"type::automation" ~"impact::0" ~"priority::0"
auto_merge: true
merge_request:
patch_labels: '~"code review::not started"'

View File

@ -1,9 +1,12 @@
{{ if false }}
# already on k3s
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -17,3 +20,4 @@ spec:
namespace: monitoring
version: v1beta1
versionPriority: 100
{{ end }}

View File

@ -4,10 +4,11 @@ kind: Alertmanager
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: main
name: {{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }}
spec:
image: "{{ .Values.nfc_monitoring.alert_manager.image.name }}:{{ .Values.nfc_monitoring.alert_manager.image.tag }}"
@ -16,12 +17,12 @@ spec:
podMetadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
replicas: 3
replicas: {{ .Values.nfc_monitoring.alert_manager.replicas }}
resources:
limits:
cpu: 100m

View File

@ -15,6 +15,7 @@ metadata:
question.
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -32,12 +33,13 @@ spec:
synchronize: true
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: "{{ `{{` }}request.object.metadata.name }}"
data:
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 14 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -15,6 +15,7 @@ metadata:
question.
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -32,20 +33,21 @@ spec:
synchronize: true
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: "{{ `{{` }}request.object.metadata.name }}"
data:
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 14 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
subjects:
- kind: ServiceAccount
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: "{{ .Values.nfc_monitoring.prometheus.namespace }}"
{{ end }}

View File

@ -5,6 +5,7 @@ metadata:
name: grafana-agent
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,9 +1,11 @@
{{ if false }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -22,3 +24,6 @@ rules:
- get
- list
- watch
# Already exists on k3s
{{ end }}

View File

@ -4,6 +4,7 @@ kind: ClusterRoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -5,6 +5,7 @@ metadata:
name: hpa-controller-custom-metrics
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -4,6 +4,7 @@ kind: ClusterRoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
{{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}}
---
apiVersion: rbac.authorization.k8s.io/v1
@ -5,6 +6,7 @@ kind: ClusterRole
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -14,3 +16,4 @@ rules:
resources: ["configmaps"]
verbs: ["get", "watch", "list"]
{{- end }}
{{- end }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@ -5,7 +6,7 @@ metadata:
name: kube-monitor-proxy
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -19,3 +20,5 @@ rules:
resources:
- subjectaccessreviews
verbs: ["create"]
{{ end }}

View File

@ -1,10 +1,11 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -18,3 +19,5 @@ subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: monitoring
{{ end }}

View File

@ -1,10 +1,11 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -130,3 +131,5 @@ rules:
verbs:
- list
- watch
{{ end }}

View File

@ -4,6 +4,7 @@ kind: ClusterRole
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -4,6 +4,7 @@ kind: ClusterRole
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -4,10 +4,11 @@ kind: ClusterRole
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
rules:
- apiGroups:
- ""

View File

@ -4,6 +4,7 @@ kind: ClusterRoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
{{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}}
---
kind: ClusterRoleBinding
@ -5,6 +6,7 @@ apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -18,3 +20,4 @@ subjects:
name: grafana
namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}"
{{- end }}
{{- end }}

View File

@ -5,7 +5,7 @@ metadata:
name: kube-monitor-proxy
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}

View File

@ -4,15 +4,16 @@ kind: ClusterRoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
subjects:
- kind: ServiceAccount
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
apiVersion: v1
kind: ConfigMap
@ -6,6 +7,7 @@ metadata:
namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}"
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -123,4 +125,4 @@ data:
}
}
---
{{- end }}

View File

@ -4,6 +4,7 @@ kind: ConfigMap
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -15,7 +16,7 @@ data:
wal_directory: /tmp/wal
{{ if .Values.nfc_monitoring.loki.config }}
logs:
positions_directory: "/tmp"
@ -234,7 +235,7 @@ data:
- target_label: node
source_labels:
- __meta_kubernetes_pod_node_name
{{ end }}
integrations:
@ -262,6 +263,8 @@ data:
syslog_server.yaml: |
# REF: https://grafana.com/docs/loki/latest/send-data/promtail/configuration/#example-syslog-config
{{ if .Values.nfc_monitoring.loki.config }}
server:
http_listen_port: 9080
grpc_listen_port: 0
@ -281,3 +284,5 @@ data:
relabel_configs:
- source_labels: ['__syslog_message_hostname']
target_label: 'host'
{{ end }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
{{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}}
---
# Provisioning config
@ -6,6 +7,7 @@ kind: ConfigMap
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -27,3 +29,4 @@ data:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true
{{- end }}
{{- end }}

View File

@ -4,6 +4,7 @@ kind: ConfigMap
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -4,6 +4,7 @@ kind: DaemonSet
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -16,6 +17,7 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -26,6 +28,7 @@ spec:
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 8 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,10 +1,11 @@
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -19,7 +20,7 @@ spec:
selector:
matchLabels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -28,7 +29,7 @@ spec:
metadata:
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -134,3 +135,5 @@ spec:
serviceAccountName: kube-monitor-proxy
tolerations:
- operator: Exists
{{ end }}

View File

@ -1,10 +1,11 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -16,7 +17,7 @@ spec:
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -26,7 +27,7 @@ spec:
kubectl.kubernetes.io/default-container: kube-state-metrics
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -110,3 +111,5 @@ spec:
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics
{{ end }}

View File

@ -4,6 +4,7 @@ kind: Deployment
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -14,6 +15,7 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -25,6 +27,7 @@ spec:
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 8 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,9 +1,11 @@
---
{{ if .Values.nfc_monitoring.grafana.enabled -}}
apiVersion: grafana.integreatly.org/v1beta1
kind: Grafana
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -16,6 +18,7 @@ spec:
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 8 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -24,6 +27,7 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 10 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
strategy:
@ -34,6 +38,7 @@ spec:
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 12 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -91,7 +96,7 @@ spec:
name: dashboards
- image: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.image.name }}:{{ .Values.nfc_monitoring.additions.dashboard_sidecar.image.tag}}"
name: k8s-sidecar
name: sidecar
env:
- name: LABEL
value: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.label_name }}"
@ -147,3 +152,5 @@ spec:
resources:
requests:
storage: "5Gi"
{{- end }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDashboard
@ -11,8 +12,10 @@ spec:
instanceSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
grafanaCom:
id: 9578
revision: 4 # as @ 19-09-23
revision: 4 # as @ 19-09-23
{{- end }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
{{- if .Values.nfc_monitoring.additions.ceph.enabled | default false -}}
apiVersion: grafana.integreatly.org/v1beta1
@ -12,10 +13,11 @@ spec:
instanceSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
grafanaCom:
id: 2842
revision: 17 # as @ 19-09-23
{{- end -}}
{{- end -}}
{{- end }}

View File

@ -0,0 +1,25 @@
---
{{ if .Values.nfc_monitoring.grafana.enabled -}}
{{- if eq .Values.nfc_monitoring.kubernetes.networking "calico" }}
{{- if .Values.nfc_monitoring.prometheus.service_monitor.calico }}
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDashboard
metadata:
name: calico-felix
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
spec:
allowCrossNamespaceImport: true
folder: No Fuss Monitoring
resyncPeriod: 1d
instanceSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
grafanaCom:
id: 12175
revision: 5 # as @ 2020-05-04T16:47:08
{{- end }}
{{ end }}
{{ end }}

View File

@ -0,0 +1,21 @@
---
{{ if .Values.nfc_monitoring.grafana.dashboards.cert_manager | default false -}}
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDashboard
metadata:
name: node-exporter
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
spec:
allowCrossNamespaceImport: true
folder: No Fuss Monitoring
resyncPeriod: 1d
instanceSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
grafanaCom:
id: 11001
revision: 1 # as @ 2019-10-16T13:48:56
{{- end }}

View File

@ -1,4 +1,5 @@
---
{{ if .Values.nfc_monitoring.grafana.enabled -}}
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDashboard
metadata:
@ -11,9 +12,10 @@ spec:
instanceSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
{{ $Dashboard := .Files.Get "files/dashboard-summary.json" | fromJson }}
json: >-
{{ $Dashboard | toRawJson }}
{{- end }}

View File

@ -1,4 +1,5 @@
---
{{ if .Values.nfc_monitoring.grafana.enabled -}}
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDashboard
metadata:
@ -11,8 +12,10 @@ spec:
instanceSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
grafanaCom:
id: 1860
revision: 32 # as @ 19-09-23
revision: 32 # as @ 19-09-23
{{- end }}

View File

@ -1,4 +1,5 @@
---
{{ if .Values.nfc_monitoring.grafana.enabled -}}
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDatasourceList
items:
@ -10,7 +11,7 @@ items:
namespace: "{{ $.Values.nfc_monitoring.grafana.namespace }}"
labels:
app.kubernetes.io/component: dashboard
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -24,3 +25,4 @@ items:
{{ toYaml . | nindent 8 }}
{{ end }}
{{- end }}

View File

@ -1,4 +1,4 @@
{{ if .Values.nfc_monitoring.alert_manager.ingress.enabled | default "false" -}}
{{ if .Values.nfc_monitoring.alert_manager.ingress.enabled -}}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
@ -8,11 +8,26 @@ metadata:
annotations:
{{ toYaml $.Values.nfc_monitoring.alert_manager.ingress.annotations | nindent 4 }}
spec:
{{ toYaml $.Values.nfc_monitoring.alert_manager.ingress.spec | nindent 2 }}
tls:
- hosts:
- {{ .Values.nfc_monitoring.alert_manager.ingress.hostname }}
secretName: certificate-tls-alert-manager
rules:
- host: {{ .Values.nfc_monitoring.alert_manager.ingress.hostname }}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: alertmanager-{{ $.Release.Name }}
port:
name: web
{{ end }}
{{ if .Values.nfc_monitoring.grafana.ingress.enabled | default "false" -}}
{{ if .Values.nfc_monitoring.grafana.enabled -}}
{{ if .Values.nfc_monitoring.grafana.ingress.enabled -}}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
@ -22,9 +37,24 @@ metadata:
annotations:
{{ toYaml $.Values.nfc_monitoring.grafana.ingress.annotations | nindent 4 }}
spec:
{{ toYaml $.Values.nfc_monitoring.grafana.ingress.spec | nindent 2 }}
tls:
- hosts:
- {{ .Values.nfc_monitoring.grafana.ingress.hostname }}
secretName: certificate-tls-grafana
rules:
- host: {{ .Values.nfc_monitoring.grafana.ingress.hostname }}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
name: grafana-http
{{ end }}
{{ end }}
{{ if .Values.nfc_monitoring.prometheus.ingress.enabled | default "false" -}}
---
@ -36,6 +66,20 @@ metadata:
annotations:
{{ toYaml $.Values.nfc_monitoring.prometheus.ingress.annotations | nindent 4 }}
spec:
{{ toYaml $.Values.nfc_monitoring.prometheus.ingress.spec | nindent 2 }}
tls:
- hosts:
- {{ .Values.nfc_monitoring.prometheus.ingress.hostname }}
secretName: certificate-tls-prometheus
rules:
- host: {{ .Values.nfc_monitoring.prometheus.ingress.hostname }}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus-{{ $.Release.Name }}
port:
name: web
{{ end }}

View File

@ -1,31 +1,37 @@
# apiVersion: networking.k8s.io/v1
# kind: NetworkPolicy
# metadata:
# labels:
# app.kubernetes.io/component: exporter
# app.kubernetes.io/name: kube-state-metrics
# app.kubernetes.io/part-of: kube-prometheus
# app.kubernetes.io/version: 2.8.1
# name: kube-state-metrics
# namespace: monitoring
# spec:
# egress:
# - {}
# ingress:
# - from:
# - podSelector:
# matchLabels:
# app.kubernetes.io/name: prometheus
# ports:
# - port: 8443
# protocol: TCP
# - port: 9443
# protocol: TCP
# podSelector:
# matchLabels:
# app.kubernetes.io/component: exporter
# app.kubernetes.io/name: kube-state-metrics
# app.kubernetes.io/part-of: kube-prometheus
# policyTypes:
# - Egress
# - Ingress
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.8.1
name: kube-state-metrics
namespace: monitoring
spec:
egress:
- {}
ingress:
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
ports:
- port: 8443
protocol: TCP
- port: 9443
protocol: TCP
podSelector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: kube-prometheus
policyTypes:
- Egress
- Ingress
{{ end }}

View File

@ -1,21 +1,313 @@
{{ range .Values.nfc_monitoring.network_policy.policies }}
{{- if .Values.nfc_monitoring.network_policy.enabled -}}
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
labels:
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: {{ .name | replace "_" "-" }}
namespace: {{ (get $.Values.nfc_monitoring .name ).namespace }}
spec:
{{ toYaml .policy | nindent 2 }}
- name: prometheus
policy:
egress: # ToDo: add further restrictions to egress. is variable lookup possible to obtain values????
# - {}
- to: # Alert Manager
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: alerting
podSelector:
matchLabels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
ports:
- port: 9093
protocol: TCP
- to: # Ceph
- ipBlock:
cidr: 172.16.10.0/24
ports:
- port: 9283
protocol: TCP
- to: # Grafana
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: grafana
podSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: grafana
ports:
- port: 3000
protocol: TCP
- to: # Grafana Agent
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/component: exporter
app.kubernetes.io/name: grafana-agent
ports:
- port: 12345
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
- to:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
# namespaceSelector:
# matchLabels:
# kubernetes.io/metadata.name: monitoiring
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
ports: []
- {} # ToDo: Temp rule: Allow All. this rule MUST be removed when egress has been refactored
ingress:
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
# namespaceSelector:
# matchLabels:
# kubernetes.io/metadata.name: monitoiring
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
ports: []
# - port: 8080
# protocol: TCP
# - port: 9090
# protocol: TCP
# - port: 10901
# protocol: TCP
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: grafana
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: grafana
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus-adapter
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
ports:
- port: 9090
protocol: TCP
- from: []
ports: []
policyTypes:
- Egress
- Ingress
podSelector:
matchLabels:
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 6 }}
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
labels:
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: {{ .name | replace "_" "-" }}
namespace: {{ (get $.Values.nfc_monitoring .name ).namespace }}
spec:
- name: grafana
policy:
egress:
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: alerting
podSelector:
matchLabels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
ports:
- port: 9093
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: logging
podSelector:
matchLabels:
app.kubernetes.io/component: gateway
app.kubernetes.io/instance: loki
app.kubernetes.io/name: loki
ports:
- port: 80 # Service Port
protocol: TCP
- port: 8080 # Pod Port
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: prometheus
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
podSelector:
matchLabels:
app.kubernetes.io/component: query-layer
app.kubernetes.io/instance: thanos-query
app.kubernetes.io/name: thanos-query
ports:
- port: 9090
protocol: TCP
- to: [] # Requires internet access for plugins and dashboard downloading
ports:
- port: 443
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
ingress:
- from: []
ports:
- port: 3000
protocol: TCP
policyTypes:
- Egress
- Ingress
podSelector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 8 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
labels:
{{ toYaml (get $.Values.nfc_monitoring .name ).labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: {{ .name | replace "_" "-" }}
namespace: {{ (get $.Values.nfc_monitoring .name ).namespace }}
spec:
- name: grafana_agent
policy:
egress:
- to: # Logging
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: logging
podSelector:
matchLabels:
app.kubernetes.io/component: gateway
app.kubernetes.io/instance: loki
app.kubernetes.io/name: loki
ports:
- port: 80
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
ports:
- port: 12345
protocol: TCP
policyTypes:
- Egress
- Ingress
podSelector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 8 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -4,6 +4,7 @@ kind: PodDisruptionBudget
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -14,4 +15,5 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}

View File

@ -4,6 +4,7 @@ kind: PodDisruptionBudget
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -14,4 +15,5 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}

View File

@ -7,11 +7,12 @@ metadata:
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
spec:
minAvailable: 1
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}

View File

@ -4,10 +4,11 @@ kind: Prometheus
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: k8s
name: {{ $.Release.Name }}
namespace: "{{ .Values.nfc_monitoring.prometheus.namespace }}"
spec:
affinity:
@ -15,17 +16,18 @@ spec:
alerting:
alertmanagers:
- apiVersion: v2
name: alertmanager-main
name: alertmanager-{{ $.Release.Name }}
namespace: "{{ .Values.nfc_monitoring.alert_manager.namespace }}"
port: web
enableFeatures: []
externalLabels: {}
image: quay.io/prometheus/prometheus:v2.42.0
image: {{ .Values.nfc_monitoring.prometheus.image.name }}:{{ .Values.nfc_monitoring.prometheus.image.tag}}
nodeSelector:
kubernetes.io/os: linux
podMetadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -33,7 +35,11 @@ spec:
podMonitorSelector: {}
probeNamespaceSelector: {}
probeSelector: {}
replicas: 3
{{ if .Values.nfc_monitoring.prometheus.remotewrite }}
remoteWrite: {{ .Values.nfc_monitoring.prometheus.remotewrite | toYaml | nindent 4 }}
{{- end }}
replicas: {{ $.Values.nfc_monitoring.prometheus.replicas }}
resources:
requests:
memory: 400Mi
@ -42,18 +48,20 @@ spec:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: prometheus-k8s
serviceAccountName: prometheus-{{ $.Release.Name }}
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
storage:
{{- toYaml .Values.nfc_monitoring.prometheus.storage | nindent 4 }}
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
thanos:
image: "{{ .Values.nfc_monitoring.thanos.image.name }}:{{ .Values.nfc_monitoring.thanos.image.tag }}"
objectStorageConfig:
key: thanos.yaml
name: thanos-sidecar-config
{{ end }}
{{ end }}
version: 2.42.0
{{ if .Values.nfc_monitoring.prometheus.additional }}
{{ toYaml .Values.nfc_monitoring.prometheus.additional | nindent 2 }}

View File

@ -4,10 +4,11 @@ kind: PrometheusRule
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: alertmanager-main-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -23,7 +24,7 @@ spec:
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m]) == 0
for: 10m
labels:
severity: critical
@ -35,9 +36,9 @@ spec:
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
max_over_time(alertmanager_cluster_members{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m])
< on (namespace,service) group_left
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m]))
for: 15m
labels:
severity: critical
@ -48,9 +49,9 @@ spec:
summary: An Alertmanager instance failed to send notifications.
expr: |
(
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
rate(alertmanager_notifications_failed_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
rate(alertmanager_notifications_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m])
)
> 0.01
for: 5m
@ -63,9 +64,9 @@ spec:
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
expr: |
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
rate(alertmanager_notifications_failed_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration=~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
rate(alertmanager_notifications_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration=~`.*`}[5m])
)
> 0.01
for: 5m
@ -78,9 +79,9 @@ spec:
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
expr: |
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
rate(alertmanager_notifications_failed_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration!~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
rate(alertmanager_notifications_total{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring", integration!~`.*`}[5m])
)
> 0.01
for: 5m
@ -93,7 +94,7 @@ spec:
summary: Alertmanager instances within the same cluster have different configurations.
expr: |
count by (namespace,service) (
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"})
)
!= 1
for: 20m
@ -107,11 +108,11 @@ spec:
expr: |
(
count by (namespace,service) (
avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
avg_over_time(up{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[5m]) < 0.5
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="monitoring"}
up{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}
)
)
>= 0.5
@ -126,11 +127,11 @@ spec:
expr: |
(
count by (namespace,service) (
changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
changes(process_start_time_seconds{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}[10m]) > 4
)
/
count by (namespace,service) (
up{job="alertmanager-main",namespace="monitoring"}
up{job="alertmanager-{{ $.Release.Name }}",namespace="monitoring"}
)
)
>= 0.5

View File

@ -0,0 +1,48 @@
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-prometheus
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: {{ $.Release.Name }}
role: alert-rules
name: common
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
spec:
groups:
- name: common.rules
rules:
- alert: Watchdog
annotations:
description: |
This is an alert is meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
summary: An alert that should always be firing to certify that Alertmanager is working properly.
expr: vector(1)
labels:
severity: none
- alert: InfoInhibitor
annotations:
description: |
This is an alert that is used to inhibit info alerts.
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
other alerts.
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
severity of 'warning' or 'critical' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels:
severity: none

View File

@ -4,10 +4,11 @@ kind: PrometheusRule
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: grafana-agent
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}

View File

@ -1,13 +1,15 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: grafana-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -32,3 +34,5 @@ spec:
- expr: |
sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m
{{- end }}

View File

@ -1,14 +1,16 @@
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-prometheus
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: kube-prometheus-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -25,33 +27,6 @@ spec:
for: 10m
labels:
severity: warning
- alert: Watchdog
annotations:
description: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
summary: An alert that should always be firing to certify that Alertmanager is working properly.
expr: vector(1)
labels:
severity: none
- alert: InfoInhibitor
annotations:
description: |
This is an alert that is used to inhibit info alerts.
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
other alerts.
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
severity of 'warning' or 'critical' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels:
severity: none
- name: node-network
rules:
- alert: NodeNetworkInterfaceFlapping
@ -84,3 +59,5 @@ spec:
record: count:up1
- expr: count without(instance, pod, node) (up == 0)
record: count:up0
{{ end }}

View File

@ -1,14 +1,16 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: kube-state-metrics-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -65,3 +67,5 @@ spec:
for: 15m
labels:
severity: critical
{{ end }}

View File

@ -1,13 +1,15 @@
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-prometheus
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: kubernetes-monitoring-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -1439,3 +1441,5 @@ spec:
labels:
quantile: "0.5"
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
{{ end }}

View File

@ -1,15 +1,17 @@
---
{{- if .Values.nfc_monitoring.loki.enabled | default false -}}
{{ if .Values.nfc_monitoring.loki.config }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: logging
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: loki
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: loki
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -109,4 +111,5 @@ spec:
labels:
severity: warning
{{ end }}
{{- end -}}

View File

@ -4,11 +4,12 @@ kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: node-exporter-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -16,6 +17,16 @@ spec:
groups:
- name: node-exporter
rules:
{{ range $index, $node := (lookup "v1" "Node" "" "").items }}
- alert: NodeExporterJobMissing-{{ $node.metadata.name }}
annotations:
summary: Node Exporter job missing for node {{ $node.metadata.name }}. (instance {{ `{{` }} $labels.instance }})
description: "Node Exporter job has disappeared\n Node = {{ $node.metadata.name }}\n Value = {{ `{{` }} $value }}\n LABELS = {{ `{{` }} $labels }}"
expr: absent(up{job="node-exporter", node="{{ $node.metadata.name }}"})
for: 0m
labels:
severity: critical
{{ end }}
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available space left and is filling up.

View File

@ -4,12 +4,13 @@ kind: PrometheusRule
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: prometheus-k8s-prometheus-rules
name: prometheus-{{ $.Release.Name }}-prometheus-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
spec:
groups:
@ -23,7 +24,7 @@ spec:
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
max_over_time(prometheus_config_last_reload_successful{job="prometheus",namespace="monitoring"}[5m]) == 0
for: 10m
labels:
severity: critical
@ -36,9 +37,9 @@ spec:
# Without min_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
predict_linear(prometheus_notifications_queue_length{job="prometheus",namespace="monitoring"}[5m], 60 * 30)
>
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
min_over_time(prometheus_notifications_queue_capacity{job="prometheus",namespace="monitoring"}[5m])
)
for: 15m
labels:
@ -50,9 +51,9 @@ spec:
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
expr: |
(
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
rate(prometheus_notifications_errors_total{job="prometheus",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
rate(prometheus_notifications_sent_total{job="prometheus",namespace="monitoring"}[5m])
)
* 100
> 1
@ -67,7 +68,7 @@ spec:
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus",namespace="monitoring"}[5m]) < 1
for: 10m
labels:
severity: warning
@ -77,7 +78,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
summary: Prometheus has issues reloading blocks from disk.
expr: |
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
increase(prometheus_tsdb_reloads_failures_total{job="prometheus",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
@ -87,7 +88,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
summary: Prometheus has issues compacting blocks.
expr: |
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
increase(prometheus_tsdb_compactions_failed_total{job="prometheus",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
@ -98,12 +99,12 @@ spec:
summary: Prometheus is not ingesting samples.
expr: |
(
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus",namespace="monitoring"}[5m]) <= 0
and
(
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus",namespace="monitoring"}) > 0
or
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus",namespace="monitoring"}) > 0
)
)
for: 10m
@ -115,7 +116,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
summary: Prometheus is dropping samples with duplicate timestamps.
expr: |
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 10m
labels:
severity: warning
@ -125,7 +126,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
summary: Prometheus drops samples with out-of-order timestamps.
expr: |
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 10m
labels:
severity: warning
@ -136,12 +137,12 @@ spec:
summary: Prometheus fails to send samples to remote storage.
expr: |
(
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus",namespace="monitoring"}[5m]))
/
(
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus",namespace="monitoring"}[5m]))
+
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
(rate(prometheus_remote_storage_succeeded_samples_total{job="job="prometheus",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus",namespace="monitoring"}[5m]))
)
)
* 100
@ -158,9 +159,9 @@ spec:
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus",namespace="monitoring"}[5m])
- ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus",namespace="monitoring"}[5m])
)
> 120
for: 15m
@ -168,16 +169,16 @@ spec:
severity: critical
- alert: PrometheusRemoteWriteDesiredShards
annotations:
description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} remote write desired shards calculation wants to run {{ `{{` }} $value }} shards for queue {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }}, which is more than the max of {{ `{{` }} printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} remote write desired shards calculation wants to run {{ `{{` }} $value }} shards for queue {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }}, which is more than the max of {{ `{{` }} printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus",namespace="monitoring"}` $labels.instance | query | first | value }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus",namespace="monitoring"}[5m])
>
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
max_over_time(prometheus_remote_storage_shards_max{job="prometheus",namespace="monitoring"}[5m])
)
for: 15m
labels:
@ -188,7 +189,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
summary: Prometheus is failing rule evaluations.
expr: |
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
increase(prometheus_rule_evaluation_failures_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: critical
@ -198,7 +199,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
expr: |
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
increase(prometheus_rule_group_iterations_missed_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
@ -208,7 +209,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
expr: |
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
@ -218,7 +219,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
expr: |
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
@ -228,7 +229,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
summary: Prometheus has dropped some targets that exceeded body size limit.
expr: |
increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
@ -238,7 +239,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
expr: |
increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
@ -248,7 +249,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
summary: Prometheus has failed to sync targets.
expr: |
increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0
increase(prometheus_target_sync_failed_total{job="prometheus",namespace="monitoring"}[30m]) > 0
for: 5m
labels:
severity: critical
@ -258,7 +259,7 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload
summary: Prometheus is reaching its maximum capacity serving concurrent requests.
expr: |
avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8
avg_over_time(prometheus_engine_queries{job="prometheus",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus",namespace="monitoring"}[5m]) > 0.8
for: 15m
labels:
severity: warning
@ -269,9 +270,9 @@ spec:
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without (alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
rate(prometheus_notifications_errors_total{job="prometheus",namespace="monitoring",alertmanager!~``}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
rate(prometheus_notifications_sent_total{job="prometheus",namespace="monitoring",alertmanager!~``}[5m])
)
* 100
> 3

View File

@ -1,15 +1,17 @@
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: metrics
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: thanos
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: thanos-sidecar-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
@ -52,4 +54,5 @@ spec:
labels:
severity: critical
{{ end }}
{{ end }}
{{ end }}

View File

@ -2,17 +2,18 @@
apiVersion: rbac.authorization.k8s.io/v1
items:
{{ range .Values.nfc_monitoring.prometheus.monitor_namespaces }}
{{ range $index, $namespace := (lookup "v1" "Namespace" "" "").items }}
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
namespace: {{ . | quote }}
name: prometheus-{{ $.Release.Name }}
namespace: {{ $namespace.metadata.name | quote }}
rules:
- apiGroups:
- ""

View File

@ -4,6 +4,7 @@ kind: RoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -12,8 +13,8 @@ metadata:
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s-config
name: prometheus-{{ $.Release.Name }}-config
subjects:
- kind: ServiceAccount
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}

View File

@ -1,24 +1,25 @@
---
apiVersion: rbac.authorization.k8s.io/v1
items:
{{ range .Values.nfc_monitoring.prometheus.monitor_namespaces }}
{{ range $index, $namespace := (lookup "v1" "Namespace" "" "").items }}
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
namespace: {{ . | quote }}
name: prometheus-{{ $.Release.Name }}
namespace: {{ $namespace.metadata.name | quote }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
subjects:
- kind: ServiceAccount
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ $.Values.nfc_monitoring.prometheus.namespace }}
{{ end }}

View File

@ -4,6 +4,7 @@ kind: RoleBinding
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/version: 0.11.1
name: resource-metrics-auth-reader
namespace: kube-system

View File

@ -4,10 +4,11 @@ kind: Role
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s-config
name: prometheus-{{ $.Release.Name }}-config
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
rules:
- apiGroups:

View File

@ -4,6 +4,7 @@ kind: Secret
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,10 +1,12 @@
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
---
apiVersion: v1
kind: Secret
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -16,3 +18,4 @@ stringData:
{{ toYaml .Values.nfc_monitoring.thanos.sidecar.config | nindent 4 }}
{{ end }}
{{ end }}

View File

@ -5,7 +5,7 @@ apiVersion: v1
kind: Service
metadata:
name: calico-metrics
namespace: kube-system
namespace: calico-system
labels:
k8s-app: calico-node
spec:

View File

@ -1,5 +1,5 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
apiVersion: v1
kind: Service
metadata:
@ -7,12 +7,14 @@ metadata:
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
spec:
selector:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
#type: NodePort
#type: LoadBalancer
@ -24,3 +26,5 @@ spec:
#nodePort: 3000
#type: LoadBalancer
sessionAffinity: ClientIP
{{- end }}

View File

@ -6,12 +6,14 @@ metadata:
namespace: monitoring
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
spec:
selector:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
ports:
- name: grafana-metrics

View File

@ -4,10 +4,11 @@ kind: Service
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: alertmanager-main
name: alertmanager-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }}
spec:
ports:
@ -19,7 +20,7 @@ spec:
targetPort: reloader-web
selector:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: {{ $.Chart.Name }}
sessionAffinity: ClientIP

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: v1
kind: Service
@ -6,7 +7,7 @@ metadata:
namespace: monitoring
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -16,7 +17,7 @@ metadata:
spec:
selector:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -28,3 +29,5 @@ spec:
port: 10259
targetPort: kube-scheduler
sessionAffinity: ClientIP
{{ end }}

View File

@ -1,10 +1,11 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -22,7 +23,8 @@ spec:
targetPort: https-self
selector:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -4,6 +4,7 @@ kind: Service
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -16,4 +17,5 @@ spec:
targetPort: 6443
selector:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}

View File

@ -4,10 +4,11 @@ kind: Service
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
spec:
ports:
@ -19,6 +20,7 @@ spec:
targetPort: reloader-web
selector:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
sessionAffinity: ClientIP
@ -30,7 +32,7 @@ kind: Service
metadata:
labels:
app.kubernetes.io/component: prometheus-sidecar
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: thanos-sidecar
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
@ -48,5 +50,6 @@ spec:
targetPort: 10902
selector:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
{{ end }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
apiVersion: v1
automountServiceAccountToken: false
@ -7,6 +8,9 @@ metadata:
namespace: {{ .Values.nfc_monitoring.grafana.namespace }}
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
{{- end }}

View File

@ -7,6 +7,7 @@ metadata:
namespace: monitoring
labels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -5,6 +5,7 @@ kind: ServiceAccount
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: v1
kind: ServiceAccount
@ -6,8 +7,10 @@ metadata:
namespace: monitoring
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
{{ end }}

View File

@ -1,3 +1,4 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: v1
automountServiceAccountToken: false
@ -5,10 +6,12 @@ kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: kube-state-metrics
namespace: monitoring
{{ end }}

View File

@ -5,6 +5,7 @@ kind: ServiceAccount
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}

View File

@ -5,8 +5,9 @@ kind: ServiceAccount
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}

View File

@ -1,8 +1,10 @@
{{ if .Values.nfc_monitoring.prometheus.service_monitor.apiserver }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: apiserver
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -76,3 +78,5 @@ spec:
matchLabels:
component: apiserver
provider: kubernetes
{{ end }}

View File

@ -1,8 +1,10 @@
{{ if .Values.nfc_monitoring.prometheus.service_monitor.cadvisor }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: cadvisor
app.kubernetes.io/component: exporter
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -50,3 +52,5 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: kubelet
{{ end }}

View File

@ -1,10 +1,12 @@
---
{{- if eq .Values.nfc_monitoring.kubernetes.networking "calico" -}}
{{ if .Values.nfc_monitoring.prometheus.service_monitor.calico }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: calico
app.kubernetes.io/component: networking
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -12,7 +14,7 @@ metadata:
app.kubernetes.io/managed-by: {{ $.Release.Service }}
k8s-app: calico-node
name: calico
namespace: kube-system
namespace: calico-system
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
@ -34,3 +36,4 @@ spec:
k8s-app: calico-node
{{- end -}}
{{ end }}

View File

@ -1,8 +1,10 @@
{{ if .Values.nfc_monitoring.prometheus.service_monitor.coredns }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: coredns
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -26,3 +28,5 @@ spec:
selector:
matchLabels:
k8s-app: kube-dns
{{ end }}

View File

@ -1,9 +1,11 @@
{{ if .Values.nfc_monitoring.grafana.enabled -}}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -31,3 +33,7 @@ spec:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana.labels | nindent 6 }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/instance: {{ $.Release.Name }}
{{- end }}

View File

@ -1,8 +1,10 @@
{{ if .Values.nfc_monitoring.prometheus.service_monitor.kubelet }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kubelet
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -85,3 +87,6 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: kubelet
app.kubernetes.io/instance: {{ $.Release.Name }}
{{ end }}

View File

@ -1,9 +1,11 @@
{{ if .Values.nfc_monitoring.grafana_agent.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: node
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -74,4 +76,7 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -7,6 +7,7 @@ metadata:
labels:
app.kubernetes.io/name: ceph
app.kubernetes.io/component: storage
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}

View File

@ -1,10 +1,12 @@
{{ if .Values.nfc_monitoring.prometheus.service_monitor.kube_controller_manager }}
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-controller-manager
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -77,6 +79,9 @@ spec:
insecureSkipVerify: true
selector:
matchLabels:
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}
{{ end }}

View File

@ -1,10 +1,12 @@
{{ if .Values.nfc_monitoring.prometheus.service_monitor.kube_scheduler }}
{{ if .Values.nfc_monitoring.kube_monitor_proxy.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: proxy
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-scheduler
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -31,6 +33,9 @@ spec:
insecureSkipVerify: true
selector:
matchLabels:
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-monitor-proxy
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}
{{ end }}

View File

@ -1,10 +1,11 @@
{{ if .Values.nfc_monitoring.kube_state_metrics.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
@ -41,6 +42,8 @@ spec:
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -1,9 +1,11 @@
{{ if .Values.nfc_monitoring.grafana_agent.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -39,4 +41,7 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.grafana_agent.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -1,9 +1,11 @@
{{ if .Values.nfc_monitoring.prometheus_adaptor.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -32,4 +34,7 @@ spec:
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.prometheus_adaptor.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -1,13 +1,15 @@
{{ if .Values.nfc_monitoring.prometheus.enabled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
name: prometheus-k8s
name: prometheus-{{ $.Release.Name }}
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}
spec:
endpoints:
@ -20,21 +22,26 @@ spec:
sourceLabels:
- __meta_kubernetes_pod_name
targetLabel: instance
- targetLabel: "job"
replacement: "prometheus"
- interval: 30s
port: reloader-web
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.prometheus.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ if .Values.nfc_monitoring.thanos.sidecar.enabled }}
{{ if .Values.nfc_monitoring.thanos.sidecar.config }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: prometheus-sidecar
app.kubernetes.io/instance: thanos-sidecar
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: thanos-sidecar
app.kubernetes.io/version: v0.30.2
name: thanos-sidecar
@ -52,8 +59,10 @@ spec:
selector:
matchLabels:
app.kubernetes.io/component: prometheus-sidecar
app.kubernetes.io/instance: k8s
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: thanos-sidecar
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}
{{ end }}
{{ end }}
{{ end }}

View File

@ -6,11 +6,12 @@ kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: storage
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/name: ceph
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
prometheus: k8s
prometheus: {{ $.Release.Name }}
role: alert-rules
name: ceph-rules
namespace: {{ .Values.nfc_monitoring.prometheus.namespace }}

View File

@ -1,9 +1,11 @@
{{ if .Values.nfc_monitoring.alert_manager.enabled -}}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 4 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
app.kubernetes.io/version: {{ $.Chart.Version }}
@ -14,12 +16,30 @@ spec:
endpoints:
- interval: 30s
port: web
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_name
targetLabel: instance
- interval: 30s
port: reloader-web
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_name
targetLabel: instance
namespaceSelector:
matchNames:
- "{{ .Values.nfc_monitoring.alert_manager.namespace }}"
selector:
matchLabels:
{{ toYaml $.Values.nfc_monitoring.alert_manager.labels | nindent 6 }}
app.kubernetes.io/instance: {{ $.Release.Name }}
app.kubernetes.io/part-of: {{ $.Chart.Name }}
{{ end }}

View File

@ -12,35 +12,27 @@ nfc_monitoring:
alert_manager:
enabled: true
image:
name: quay.io/prometheus/alertmanager
tag: 'v0.26.0'
# How many replicas to deploy
replicas: 1
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
# enabled: false # Optional, boolean.
spec:
tls:
- hosts:
- alert-manager.local
secretName: certificate-tls-alert-manager
rules:
- host: alert-manager.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: alertmanager-main
port:
name: web
enabled: false
hostname: alert-manager.local
labels:
app.kubernetes.io/instance: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
@ -49,6 +41,11 @@ nfc_monitoring:
grafana:
dashboards:
cert_manager: false
enabled: false
# Grafana Configuration
# Type: Dict
# See: https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana
@ -72,33 +69,20 @@ nfc_monitoring:
image:
name: grafana/grafana
tag: '10.1.2' # '10.0.5'
tag: '10.3.1' # '10.0.5'
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
# enabled: false # Optional, boolean.
spec:
tls:
- hosts:
- grafana.local
secretName: certificate-tls-grafana
rules:
- host: grafana.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
name: grafana-http
enabled: true
hostname: grafana.local
labels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: grafana
namespace: grafana
@ -195,12 +179,14 @@ nfc_monitoring:
grafana_agent:
enabled: true
image:
name: grafana/agent
tag: 'v0.36.1'
tag: 'v0.39.2'
labels:
app.kubernetes.io/instance: k8s
app.kubernetes.io/component: exporter
app.kubernetes.io/name: grafana-agent
@ -217,9 +203,11 @@ nfc_monitoring:
namespace: logging
# If no config is setup, logging will not be enabled.
config: {}
# service name and port are used for the connection to your loki instance
service_name: loki-gateway
service_port: 80
# service_name: loki-gateway
# service_port: 80
ServiceMonitor:
selector:
@ -229,10 +217,12 @@ nfc_monitoring:
kube_monitor_proxy:
enabled: false
namespace: monitoring
kube_rbac_proxy:
# This image is used as part of kube-monitor-proxy.
image:
name: quay.io/brancz/kube-rbac-proxy
@ -240,6 +230,8 @@ nfc_monitoring:
kube_state_metrics:
enabled: false
image:
name: registry.k8s.io/kube-state-metrics/kube-state-metrics
tag: 'v2.8.1'
@ -250,34 +242,42 @@ nfc_monitoring:
image:
name: prom/prometheus
tag: 'v2.47.0'
tag: 'v2.49.0'
# How many replicas to deploy
replicas: 1
# alertmanagers:
# - name:
# Configure prometheus to write metrics to remote host
# below example config uses a secret named "prometheus-remote-write" with two keys username and password.
# Documentation: https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.RemoteWriteSpec
remotewrite: {}
# url:
# name:
# remoteTimeout: 30
# writeRelabelConfigs:
# basicAuth:
# username:
# name: prometheus-remote-write
# key: username
# password:
# name: prometheus-remote-write
# key: password
ingress:
annotations:
cert-manager.io/cluster-issuer: "selfsigned-issuer"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
# enabled: false # Optional, boolean.
spec:
tls:
- hosts:
- prometheus.local
secretName: certificate-tls-prometheus
rules:
- host: prometheus.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus-k8s
port:
name: web
enabled: true
hostname: prometheus.local
# These labels are appended to all Prometheus items and are also the selector labels
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
namespace: monitoring
@ -307,28 +307,11 @@ nfc_monitoring:
topologyKey: kubernetes.io/hostname
weight: 10
# List of namespaces that prometheus is to monitor
# used to create Roles and RoleBindings
# type: list
monitor_namespaces:
- alerting
- default
# - ceph
- grafana
- monitoring
# - kube-dashboard
# - kube-metrics
- kube-policy
- kube-system
- logging
# - mariadb
# - olm
# - operators
# Deploy a generate policy for kyverno to create Role and RoleBindings
# for the prometheus service account so it can monitor
# new/existing namespaces
kyverno_role_policy: true
kyverno_role_policy: false
storage:
volumeClaimTemplate:
@ -344,26 +327,36 @@ nfc_monitoring:
# Type: dict
additional:
# Don't declare remoteWrite Here, as it's don at path .prometheus.remote_write
# remoteWrite:
# - name: mimir
# url: http://mimir-gateway.metrics.svc.cluster.local/api/v1/push
retention: 24h
retentionSize: 20GB
retentionSize: 2GB
ruleSelector:
matchLabels:
role: alert-rules
service_monitor:
apiserver: false
cadvisor: false
calico: false
ceph: false
coredns: false
kube_controller_manager: false
kubelet: false
kube_scheduler: false
prometheus_adaptor:
enalbed: false
image:
name: registry.k8s.io/prometheus-adapter/prometheus-adapter
tag: 'v0.11.1'
labels:
app.kubernetes.io/component: metrics-adapter
app.kubernetes.io/instance: main
app.kubernetes.io/name: prometheus-adapter
namespace: monitoring
@ -404,22 +397,23 @@ nfc_monitoring:
sidecar:
enabled: true
config:
type: S3
config:
bucket: "thanos-metrics"
endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
access_key: "7J5NM2MNCDB4T4Y9OKJ5"
secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
insecure: true
# Config must be specified for the sidecar to deploy
config: {}
# type: S3
# config:
# bucket: "thanos-metrics"
# endpoint: "rook-ceph-rgw-earth.ceph.svc:80"
# access_key: "7J5NM2MNCDB4T4Y9OKJ5"
# secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG"
# insecure: true
additions:
ceph:
enabled: true
enabled: false
namespace: ceph
@ -434,7 +428,7 @@ nfc_monitoring:
# Add sidecar to grafana pod to load dashboards from configMap
dashboard_sidecar:
enabled: true
enabled: false
image:
name: ghcr.io/kiwigrid/k8s-sidecar
@ -446,278 +440,7 @@ nfc_monitoring:
network_policy:
enabled: true
# Network Policies to apply. These policies are automagically build using the values below.
# What you would find under path root.spec belongs here.
#
# Do:
# - Define 'Ingress'
# - Define 'Egress'
# - Ensure that the name matches the item name from values.yaml. i.e. nfc_monitoring.{item_name}
# for prometheus the item name is 'prometheus'. This value is used to select items pertaining to
# that item from values.yaml. for example the labels and namespace.
# Dont:
# - Define 'podSelector' as this is alreaady included using the selector labels
policies:
### SoF Network Policy: Prometheus ###
- name: prometheus
policy:
egress: # ToDo: add further restrictions to egress. is variable lookup possible to obtain values????
# - {}
- to: # Alert Manager
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: alerting
podSelector:
matchLabels:
app.kubernetes.io/instance: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
ports:
- port: 9093
protocol: TCP
- to: # Ceph
- ipBlock:
cidr: 172.16.10.0/24
ports:
- port: 9283
protocol: TCP
- to: # Grafana
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: grafana
podSelector:
matchLabels:
app.kubernetes.io/component: graphing
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: grafana
ports:
- port: 3000
protocol: TCP
- to: # Grafana Agent
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/instance: k8s
app.kubernetes.io/component: exporter
app.kubernetes.io/name: grafana-agent
ports:
- port: 12345
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
- to:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
# namespaceSelector:
# matchLabels:
# kubernetes.io/metadata.name: monitoiring
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
ports: []
- {} # ToDo: Temp rule: Allow All. this rule MUST be removed when egress has been refactored
ingress:
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus
# namespaceSelector:
# matchLabels:
# kubernetes.io/metadata.name: monitoiring
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
ports: []
# - port: 8080
# protocol: TCP
# - port: 9090
# protocol: TCP
# - port: 10901
# protocol: TCP
- from:
- podSelector:
matchLabels:
app.kubernetes.io/name: grafana
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: grafana
- podSelector:
matchLabels:
app.kubernetes.io/name: prometheus-adapter
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
ports:
- port: 9090
protocol: TCP
- from: []
ports: []
policyTypes:
- Egress
- Ingress
### SoF Network Policy: Grafana ###
- name: grafana
policy:
egress:
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: alerting
podSelector:
matchLabels:
app.kubernetes.io/instance: main
app.kubernetes.io/component: alert-router
app.kubernetes.io/name: alertmanager
ports:
- port: 9093
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: logging
podSelector:
matchLabels:
app.kubernetes.io/component: gateway
app.kubernetes.io/instance: loki
app.kubernetes.io/name: loki
ports:
- port: 80 # Service Port
protocol: TCP
- port: 8080 # Pod Port
protocol: TCP
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: metrics
podSelector:
matchLabels:
app.kubernetes.io/component: query-layer
app.kubernetes.io/instance: thanos-query
app.kubernetes.io/name: thanos-query
ports:
- port: 9090
protocol: TCP
- to: [] # Requires internet access for plugins and dashboard downloading
ports:
- port: 443
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
ingress:
- from: []
ports:
- port: 3000
protocol: TCP
policyTypes:
- Egress
- Ingress
### SoF Network Policy: Grafana Agent ###
- name: grafana_agent
policy:
egress:
- to: # Logging
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: logging
podSelector:
matchLabels:
app.kubernetes.io/component: gateway
app.kubernetes.io/instance: loki
app.kubernetes.io/name: loki
ports:
- port: 80
protocol: TCP
- to: # Kube DNS
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: TCP
- port: 53
protocol: UDP
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
podSelector:
matchLabels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
ports:
- port: 12345
protocol: TCP
policyTypes:
- Egress
- Ingress
enabled: false
loki_instance: