Merge branch 'development' into 'master'
chore: initial release See merge request nofusscomputing/projects/kubernetes_monitoring!3
This commit is contained in:
		
							
								
								
									
										7
									
								
								.cz.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								.cz.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | |||||||
|  | commitizen: | ||||||
|  |   bump_message: "build(version): bump version $current_version \u2192 $new_version" | ||||||
|  |   changelog_incremental: false | ||||||
|  |   name: cz_conventional_commits | ||||||
|  |   tag_format: $major.$minor.$patch$prerelease | ||||||
|  |   update_changelog_on_bump: true | ||||||
|  |   version: 0.0.1 | ||||||
							
								
								
									
										35
									
								
								.gitlab-ci.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								.gitlab-ci.yml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,35 @@ | |||||||
|  | variables: | ||||||
|  |   MY_PROJECT_ID: "50510268" | ||||||
|  |   GIT_SYNC_URL: "https://$GITHUB_USERNAME_ROBOT:$GITHUB_TOKEN_ROBOT@github.com/NoFussComputing/kubernetes_monitoring.git" | ||||||
|  |    | ||||||
|  |  | ||||||
|  |   PAGES_ENVIRONMENT_PATH: projects/kubernetes_monitoring/ | ||||||
|  |  | ||||||
|  | include: | ||||||
|  |   - project: nofusscomputing/projects/gitlab-ci | ||||||
|  |     ref: development | ||||||
|  |     file: | ||||||
|  |       - template/automagic.gitlab-ci.yaml | ||||||
|  |   #- template: Jobs/Container-Scanning.gitlab-ci.yml # see https://gitlab.com/gitlab-org/gitlab/-/issues/381665 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Website.Submodule.Deploy: | ||||||
|  |   extends: .submodule_update_trigger | ||||||
|  |   variables: | ||||||
|  |     SUBMODULE_UPDATE_TRIGGER_PROJECT: nofusscomputing/infrastructure/website | ||||||
|  |   environment: | ||||||
|  |     url: https://nofusscomputing.com/$PAGES_ENVIRONMENT_PATH | ||||||
|  |     name: Documentation | ||||||
|  |   rules: | ||||||
|  |     - if:  # condition_dev_branch_push | ||||||
|  |         $CI_COMMIT_BRANCH == "development" &&  | ||||||
|  |         $CI_PIPELINE_SOURCE == "push" | ||||||
|  |       exists: | ||||||
|  |         - '{docs/**,pages/**}/*.md' | ||||||
|  |       changes: | ||||||
|  |         paths: | ||||||
|  |           - '{docs/**,pages/**}/*.md' | ||||||
|  |         compare_to: 'master' | ||||||
|  |       when: always | ||||||
|  |  | ||||||
|  |     - when: never | ||||||
							
								
								
									
										8
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | |||||||
|  | [submodule "gitlab-ci"] | ||||||
|  | 	path = gitlab-ci | ||||||
|  | 	url = https://gitlab.com/nofusscomputing/projects/gitlab-ci.git | ||||||
|  | 	branch = development | ||||||
|  | [submodule "website-template"] | ||||||
|  | 	path = website-template | ||||||
|  | 	url = https://gitlab.com/nofusscomputing/infrastructure/website-template.git | ||||||
|  | 	branch = development | ||||||
							
								
								
									
										30
									
								
								.helmignore
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								.helmignore
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | |||||||
|  | # Patterns to ignore when building packages. | ||||||
|  | # This supports shell glob matching, relative path matching, and | ||||||
|  | # negation (prefixed with !). Only one pattern per line. | ||||||
|  | .DS_Store | ||||||
|  | # Common VCS dirs | ||||||
|  | .git/ | ||||||
|  | .gitignore | ||||||
|  | .bzr/ | ||||||
|  | .bzrignore | ||||||
|  | .hg/ | ||||||
|  | .hgignore | ||||||
|  | .svn/ | ||||||
|  | # Common backup files | ||||||
|  | *.swp | ||||||
|  | *.bak | ||||||
|  | *.tmp | ||||||
|  | *.orig | ||||||
|  | *~ | ||||||
|  | # Various IDEs | ||||||
|  | .project | ||||||
|  | .idea/ | ||||||
|  | *.tmproj | ||||||
|  | .vscode/ | ||||||
|  |  | ||||||
|  | # git repo files | ||||||
|  | monitoring-mixins/ | ||||||
|  | ceph-mixins/ | ||||||
|  | gitlab-ci/ | ||||||
|  | website-template/ | ||||||
|  | docs/ | ||||||
							
								
								
									
										26
									
								
								Chart.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								Chart.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | |||||||
|  | apiVersion: v2 | ||||||
|  | name: nfc-monitoring | ||||||
|  | description: | | ||||||
|  |   This helm chart installs the required components for full stack monitoring. | ||||||
|  |  | ||||||
|  |   The purpose of this chart is to provide all components required to monitor kubernetes within its entirety.  | ||||||
|  |   Storage of the monitoring data is not part of this chart nor will it be included. For this reason this chart can be added and removed without loosing your monitoring data. | ||||||
|  |  | ||||||
|  |   For this chart to function correctly the following operators are required: | ||||||
|  |  | ||||||
|  |     - [grafana](https://grafana-operator.github.io/grafana-operator/docs/installation/helm/) | ||||||
|  |  | ||||||
|  |     - [prometheus](https://prometheus-operator.dev/docs/user-guides/getting-started/) | ||||||
|  |  | ||||||
|  |   Full Docs available at http://nofusscomputing.com/projects/kubernetes_monitoring. | ||||||
|  |  | ||||||
|  | type: application | ||||||
|  |  | ||||||
|  |  | ||||||
|  | version: 0.1.0 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | appVersion: "0.1.0" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | dependencies: [] | ||||||
| @ -49,6 +49,10 @@ All contributions for this project must conducted from [Gitlab](https://gitlab.c | |||||||
| For further details on contributing please refer to the [contribution guide](CONTRIBUTING.md). | For further details on contributing please refer to the [contribution guide](CONTRIBUTING.md). | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## Documentation | ||||||
|  |  | ||||||
|  | Documentation for this project can be found at <http://nofusscomputing.com/projects/kubernetes_monitoring> | ||||||
|  |  | ||||||
| ## Other | ## Other | ||||||
|  |  | ||||||
| This repo is release under this [license](LICENSE) | This repo is release under this [license](LICENSE) | ||||||
|  | |||||||
							
								
								
									
										0
									
								
								docs/articles/index.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/articles/index.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								docs/contact.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/contact.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								docs/index.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/index.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								docs/operations/index.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/operations/index.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								docs/projects/index.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/projects/index.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										199
									
								
								docs/projects/kubernetes_monitoring/index.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										199
									
								
								docs/projects/kubernetes_monitoring/index.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,199 @@ | |||||||
|  | --- | ||||||
|  | title: Kubernetes Monitoring Helm Chart | ||||||
|  | description: How to use No Fuss Computings kubernetes monitoring helm chart for full stack monitoring. | ||||||
|  | date: 2023-09-19 | ||||||
|  | template: project.html | ||||||
|  | about: https://gitlab.com/nofusscomputing/projects/kubernetes_monitoring | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | This Helm chart deploys the components needed for full stack monitoring. | ||||||
|  |  | ||||||
|  | What this chart is: | ||||||
|  |  | ||||||
|  | - Collectors for metrics and logging | ||||||
|  |  | ||||||
|  | - Distribution / Routing of metrics/logging | ||||||
|  |  | ||||||
|  | - Data visualization | ||||||
|  |  | ||||||
|  | - Ready for logging long term storag integration, in particular Loki | ||||||
|  |  | ||||||
|  | - Ready for metrics long term storage integration | ||||||
|  |  | ||||||
|  | What this chart is not: | ||||||
|  |  | ||||||
|  | - A complete monitoring stack inclusive of long term storage. | ||||||
|  |  | ||||||
|  | Intentionally we have kept this helm chart to the monitoring components only. Keeping the monitoring components seperate from storage is advantageous. for all intents and purposes this helm chart is ephemeral. | ||||||
|  |  | ||||||
|  | This helm chart started off with components from multiple open-source projects. As such attribution is warrented, so head on over to thier projects and give them a star. Projects used to create the building blocks of this helm chart are | ||||||
|  |  | ||||||
|  | - [ceph](https://github.com/ceph/ceph) _mixin alerts/rules_ | ||||||
|  |  | ||||||
|  | - [Kube prometheus](https://github.com/prometheus-operator/kube-prometheus) | ||||||
|  |  | ||||||
|  | - [prometheus adaptor](https://github.com/kubernetes-sigs/prometheus-adapter) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## Features | ||||||
|  |  | ||||||
|  | - Alert Manager | ||||||
|  |  | ||||||
|  | - Grafana as the visualization frontend | ||||||
|  |  | ||||||
|  |     - Dashboards | ||||||
|  |  | ||||||
|  |         - Ceph | ||||||
|  |          | ||||||
|  |         - Cluster Overview | ||||||
|  |  | ||||||
|  |         - Node Exporter Full | ||||||
|  |  | ||||||
|  |         - Alert Manager | ||||||
|  |      | ||||||
|  |     - Sidecar to load dashboards from ConfigMaps _Optional_ | ||||||
|  |  | ||||||
|  | - Grafana-Agent  | ||||||
|  |  | ||||||
|  |     - daemonset for cluster nodes for exposing metrics and an injecter to loki for node and container logs. | ||||||
|  |  | ||||||
|  | - Loki integration for storing logs | ||||||
|  |  | ||||||
|  | - [Mixins](https://github.com/monitoring-mixins/website) Compatible _(partial)_, Tested with: | ||||||
|  |  | ||||||
|  |     - alert-manager | ||||||
|  |  | ||||||
|  |     - ceph | ||||||
|  |  | ||||||
|  |     - coreDNS | ||||||
|  |  | ||||||
|  |     - Kubernetes | ||||||
|  |  | ||||||
|  |     - Loki [helm chart](https://artifacthub.io/packages/helm/grafana/loki) included dashboards, see [loki repo](https://github.com/grafana/loki/tree/f4ab1e3e89ac66e1848764dc17826abde929fdc5/production/loki-mixin-compiled) for dashboards_ | ||||||
|  |  | ||||||
|  |     - Node-Exporter | ||||||
|  |  | ||||||
|  |     - Prometheus | ||||||
|  |  | ||||||
|  |     - Promtail | ||||||
|  |  | ||||||
|  | - Prometheus | ||||||
|  |  | ||||||
|  |     - Thanos sidecar _Optional_ | ||||||
|  |  | ||||||
|  | - Service monitors | ||||||
|  |  | ||||||
|  |     - API Server | ||||||
|  |  | ||||||
|  |     - CAdvisor | ||||||
|  |      | ||||||
|  |     - Calico, _Optional_ | ||||||
|  |  | ||||||
|  |         > enable calico metrics with `kubectl patch felixconfiguration default --type merge --patch '{"spec":{"prometheusMetricsEnabled": true}}'` | ||||||
|  |  | ||||||
|  |     - Ceph _Optional_ | ||||||
|  |  | ||||||
|  |     - CoreDNS | ||||||
|  |  | ||||||
|  |     - Grafana | ||||||
|  |  | ||||||
|  |     - Kube-Controler-Manager | ||||||
|  |  | ||||||
|  |     - Kube-Scheduler | ||||||
|  |  | ||||||
|  |     - Kubelet | ||||||
|  |  | ||||||
|  |     - Kube-state-metrics | ||||||
|  |  | ||||||
|  |     - Node | ||||||
|  |      | ||||||
|  |     - Node-Exporter | ||||||
|  |  | ||||||
|  |     - Prometheus | ||||||
|  |  | ||||||
|  |     - Prometheus-Adaptor | ||||||
|  |  | ||||||
|  |     - Thanos | ||||||
|  |  | ||||||
|  | - kyverno policies _(optional, set in values.yaml)_ | ||||||
|  |  | ||||||
|  |     - auto deploy policy for prometheus role and rolebinding to access other namespaces | ||||||
|  |  | ||||||
|  | - `values.yaml` customization of components. _See values.yaml for more details._ | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## Installation | ||||||
|  |  | ||||||
|  | This chart as the following **dependencies:** | ||||||
|  |  | ||||||
|  | - [Grafana Operator](https://artifacthub.io/packages/olm/community-operators/grafana-operator) | ||||||
|  |  | ||||||
|  | - [Prometheus Operator](https://artifacthub.io/packages/olm/community-operators/prometheus) | ||||||
|  |  | ||||||
|  | These dependencies must be present before installing this chart. to install run the following commands: | ||||||
|  |  | ||||||
|  | ``` bash | ||||||
|  |  | ||||||
|  | git clone -b development https://gitlab.com/nofusscomputing/projects/kubernetes_monitoring.git | ||||||
|  |  | ||||||
|  | helm upgrade -i nfc_monitoring kubernetes_monitoring/ | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ## Metrics Storage | ||||||
|  |  | ||||||
|  | There are two different ways that this chart porvides to route your metrics to long-term storage. | ||||||
|  |  | ||||||
|  | - Prometheus `remote_write` | ||||||
|  |  | ||||||
|  | - Thanos Side Car container | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ### Prometheus Remote Write | ||||||
|  |  | ||||||
|  | Using this method will have prometheus pushing it's metrics to another service to store its metrics. This option for example could be used to push metrics to Grafana Mimir. To configure add the following to the values.yaml file at path `nfc_monitoring.prometheus.additional`: | ||||||
|  |  | ||||||
|  | ``` yaml | ||||||
|  |  | ||||||
|  | remoteWrite:  | ||||||
|  |   - name: mimir | ||||||
|  |     url: http://mimir-gateway.metrics.svc.cluster.local/api/v1/push | ||||||
|  |  | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | Ensure that you set the url to the correct url. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ### Thanos Side Car | ||||||
|  |  | ||||||
|  | This method will have a Thanos container deployed within the prometheus pod. This container will then read and upload the Prometheus containers tsdb to the configured storage. To configure add the following to the values.yaml | ||||||
|  |  | ||||||
|  | - `monitoring.thanos.sidecar.enabled` set to bool `true` | ||||||
|  |  | ||||||
|  | - `nfc_monitoring.thanos.sidecar.config` updated to include the sidecar config. | ||||||
|  |  | ||||||
|  |     For example to configure the Thanos sideCar to upload Prometheus metrics to S3 storage use (updating values to suit your environment): | ||||||
|  |  | ||||||
|  |     ``` yaml | ||||||
|  |     type: S3 | ||||||
|  |     config: | ||||||
|  |       bucket: "thanos-metrics" | ||||||
|  |       endpoint: "rook-ceph-rgw-earth.ceph.svc:80" | ||||||
|  |       access_key: "7J5NM2MNCDB4T4Y9OKJ5" | ||||||
|  |       secret_key: "t9r69RzZdWEBL3NCKiUIpDk6j5625xc6HucusiGG" | ||||||
|  |     ``` | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## Template Values | ||||||
|  |  | ||||||
|  | The values file included in this helm chart is below. | ||||||
|  |  | ||||||
|  | !!! note | ||||||
|  |     This values file is from the development branch and as such may not reflect the current version you have deployed. If you require a specific version, head on over to the git repository and select the git tag that matches the version you are after. | ||||||
|  |  | ||||||
|  | ``` yaml title="values.yaml" linenums="1" | ||||||
|  |  | ||||||
|  | --8<-- "values.yaml" | ||||||
|  |  | ||||||
|  | ``` | ||||||
							
								
								
									
										0
									
								
								docs/tags.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								docs/tags.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										87
									
								
								docs/task-doc-template.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								docs/task-doc-template.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,87 @@ | |||||||
|  |  | ||||||
|  |  | ||||||
|  | short summary of the task file | ||||||
|  |  | ||||||
|  | ## {Task Name} | ||||||
|  |  | ||||||
|  | - **Name**:  | ||||||
|  |  | ||||||
|  | - **Description**:  | ||||||
|  |  | ||||||
|  | - **Module**:  | ||||||
|  |  | ||||||
|  | - **Arguments**: | ||||||
|  |  | ||||||
|  |   -  | ||||||
|  |  | ||||||
|  | - **Conditional**:  | ||||||
|  |  | ||||||
|  | - **Tags**:  | ||||||
|  |  | ||||||
|  |   -  | ||||||
|  |  | ||||||
|  | ## {Task Name} | ||||||
|  |  | ||||||
|  | - **Name**:  | ||||||
|  |  | ||||||
|  | - **Description**:  | ||||||
|  |  | ||||||
|  | - **Module**:  | ||||||
|  |  | ||||||
|  | - **Arguments**: | ||||||
|  |  | ||||||
|  |   -  | ||||||
|  |  | ||||||
|  | - **Registers**:  | ||||||
|  |  | ||||||
|  | - **Conditional**:  | ||||||
|  |  | ||||||
|  | - **Tags**:  | ||||||
|  |  | ||||||
|  |   -  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## Variables | ||||||
|  |  | ||||||
|  | The following variables can be customized in this task file: | ||||||
|  |  | ||||||
|  | ```yaml | ||||||
|  | variable_name: "default_value" | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | - `variable_name`: Description of the variable. | ||||||
|  |  | ||||||
|  | ## Tags | ||||||
|  |  | ||||||
|  | The tasks in this task file are tagged with the following tags: | ||||||
|  |  | ||||||
|  | -  | ||||||
|  |  | ||||||
|  | ## Usage | ||||||
|  |  | ||||||
|  | To use this Ansible task file, you can include it in your playbook or role and provide values for the required variables. Here's an example of how you can use this task file: | ||||||
|  |  | ||||||
|  | 1. Create a playbook (e.g., `your_playbook.yaml`) and define the necessary variables: | ||||||
|  |  | ||||||
|  | ```yaml | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | - hosts: your_hosts | ||||||
|  |   vars: | ||||||
|  |     variable_name: "value" | ||||||
|  |    | ||||||
|  |   tasks: | ||||||
|  |     - include_tasks: path/to/task_file.yaml | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | 2. Create a separate file for the task file (e.g., `task_file.yaml`) and copy the content of the task file into it. | ||||||
|  |  | ||||||
|  | 3. Run the playbook: | ||||||
|  |  | ||||||
|  | ```shell | ||||||
|  | ansible-playbook your_playbook.yaml | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | Make sure to replace the placeholder values (`variable_name`, `value`) with the appropriate values for your setup. | ||||||
|  |  | ||||||
|  | Note: You may need to adjust the playbook structure and additional tasks based on your specific requirements and the tasks you want to execute. | ||||||
							
								
								
									
										1301
									
								
								files/dashboard-summary.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1301
									
								
								files/dashboard-summary.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1
									
								
								gitlab-ci
									
									
									
									
									
										Submodule
									
								
							
							
								
								
								
								
								
							
						
						
									
										1
									
								
								gitlab-ci
									
									
									
									
									
										Submodule
									
								
							 Submodule gitlab-ci added at a5a9fa4437
									
								
							
							
								
								
									
										30
									
								
								mkdocs.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								mkdocs.yml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | |||||||
|  | INHERIT: website-template/mkdocs.yml | ||||||
|  |  | ||||||
|  | docs_dir: 'docs' | ||||||
|  |  | ||||||
|  | repo_name: Kubernetes Monitoring Helm Chart | ||||||
|  | repo_url: https://gitlab.com/nofusscomputing/projects/kubernetes_monitoring | ||||||
|  | edit_uri: '/-/ide/project/nofusscomputing/projects/kubernetes_monitoring/edit/development/-/docs/' | ||||||
|  |  | ||||||
|  | nav: | ||||||
|  | - Home: index.md | ||||||
|  |  | ||||||
|  | - Articles:  | ||||||
|  |  | ||||||
|  |   - articles/index.md | ||||||
|  |  | ||||||
|  | - Projects:  | ||||||
|  |  | ||||||
|  |   - projects/index.md | ||||||
|  |  | ||||||
|  |   - Kubernetes Monitoring: | ||||||
|  |  | ||||||
|  |     - projects/kubernetes_monitoring/index.md | ||||||
|  |  | ||||||
|  |  | ||||||
|  | - Operations:  | ||||||
|  |  | ||||||
|  |   - operations/index.md | ||||||
|  |  | ||||||
|  | - Contact Us: contact.md | ||||||
|  |  | ||||||
							
								
								
									
										21
									
								
								templates/APIService-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								templates/APIService-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: apiregistration.k8s.io/v1 | ||||||
|  | kind: APIService | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: v1beta1.metrics.k8s.io | ||||||
|  | spec: | ||||||
|  |   group: metrics.k8s.io | ||||||
|  |   groupPriorityMinimum: 100 | ||||||
|  |   insecureSkipTLSVerify: true | ||||||
|  |   service: | ||||||
|  |     name: prometheus-adapter | ||||||
|  |     namespace: monitoring | ||||||
|  |   version: v1beta1 | ||||||
|  |   versionPriority: 100 | ||||||
							
								
								
									
										39
									
								
								templates/AlertManager-k8s.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								templates/AlertManager-k8s.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,39 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: Alertmanager | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: main | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }} | ||||||
|  | spec: | ||||||
|  |   image: "{{ .Values.nfc_monitoring.alert_manager.image.name }}:{{ .Values.nfc_monitoring.alert_manager.image.tag }}" | ||||||
|  |   nodeSelector: | ||||||
|  |     kubernetes.io/os: linux | ||||||
|  |   podMetadata: | ||||||
|  |     labels: | ||||||
|  |       app.kubernetes.io/component: alert-router | ||||||
|  |       app.kubernetes.io/instance: main | ||||||
|  |       app.kubernetes.io/name: alertmanager | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   replicas: 3 | ||||||
|  |   resources: | ||||||
|  |     limits: | ||||||
|  |       cpu: 100m | ||||||
|  |       memory: 100Mi | ||||||
|  |     requests: | ||||||
|  |       cpu: 4m | ||||||
|  |       memory: 100Mi | ||||||
|  |   securityContext: | ||||||
|  |     fsGroup: 2000 | ||||||
|  |     runAsNonRoot: true | ||||||
|  |     runAsUser: 1000 | ||||||
|  |   serviceAccountName: alertmanager-main | ||||||
|  |   version: 0.25.0 | ||||||
							
								
								
									
										75
									
								
								templates/ClusterPolicy-Prometheus-Role.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								templates/ClusterPolicy-Prometheus-Role.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | |||||||
|  | {{ if .Values.nfc_monitoring.prometheus.kyverno_role_policy }} | ||||||
|  | --- | ||||||
|  | apiVersion: kyverno.io/v1 | ||||||
|  | kind: ClusterPolicy | ||||||
|  | metadata: | ||||||
|  |   name: add-prometheus-role | ||||||
|  |   annotations: | ||||||
|  |     policies.kyverno.io/title: Add Prometheus Role | ||||||
|  |     policies.kyverno.io/category: Monitoring | ||||||
|  |     policies.kyverno.io/subject: RoleBinding | ||||||
|  |     policies.kyverno.io/minversion: 1.6.0 | ||||||
|  |     policies.kyverno.io/description: >- | ||||||
|  |       This policy is responsible for ensuring that a Role for the prometheus | ||||||
|  |       monitoring instances is created to enable monitoring of  the namespace in | ||||||
|  |       question. | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | spec: | ||||||
|  |   background: true | ||||||
|  |   generateExisting: true | ||||||
|  |   rules: | ||||||
|  |     - name: generate-prometheus-role | ||||||
|  |       match: | ||||||
|  |         any: | ||||||
|  |         - resources: | ||||||
|  |             kinds: | ||||||
|  |               - Namespace | ||||||
|  |       generate: | ||||||
|  |         synchronize: true | ||||||
|  |         apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  |         kind: Role | ||||||
|  |         name: prometheus-k8s | ||||||
|  |         namespace: "{{ `{{` }}request.object.metadata.name }}" | ||||||
|  |         data: | ||||||
|  |           metadata: | ||||||
|  |             labels: | ||||||
|  |               app.kubernetes.io/component: prometheus | ||||||
|  |               app.kubernetes.io/instance: k8s | ||||||
|  |               app.kubernetes.io/name: prometheus | ||||||
|  |               app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |               app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |              | ||||||
|  |           rules: | ||||||
|  |           - apiGroups: | ||||||
|  |             - "" | ||||||
|  |             resources: | ||||||
|  |             - services | ||||||
|  |             - endpoints | ||||||
|  |             - pods | ||||||
|  |             verbs: | ||||||
|  |             - get | ||||||
|  |             - list | ||||||
|  |             - watch | ||||||
|  |           - apiGroups: | ||||||
|  |             - extensions | ||||||
|  |             resources: | ||||||
|  |             - ingresses | ||||||
|  |             verbs: | ||||||
|  |             - get | ||||||
|  |             - list | ||||||
|  |             - watch | ||||||
|  |           - apiGroups: | ||||||
|  |             - networking.k8s.io | ||||||
|  |             resources: | ||||||
|  |             - ingresses | ||||||
|  |             verbs: | ||||||
|  |             - get | ||||||
|  |             - list | ||||||
|  |             - watch | ||||||
|  |  | ||||||
|  | {{ end }} | ||||||
							
								
								
									
										53
									
								
								templates/ClusterPolicy-Prometheus-RoleBinding.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								templates/ClusterPolicy-Prometheus-RoleBinding.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | |||||||
|  | {{ if .Values.nfc_monitoring.prometheus.kyverno_role_policy }} | ||||||
|  | --- | ||||||
|  | apiVersion: kyverno.io/v1 | ||||||
|  | kind: ClusterPolicy | ||||||
|  | metadata: | ||||||
|  |   name: add-prometheus-role-binding | ||||||
|  |   annotations: | ||||||
|  |     policies.kyverno.io/title: Add Prometheus RoleBinding | ||||||
|  |     policies.kyverno.io/category: Monitoring | ||||||
|  |     policies.kyverno.io/subject: RoleBinding | ||||||
|  |     policies.kyverno.io/minversion: 1.6.0 | ||||||
|  |     policies.kyverno.io/description: >- | ||||||
|  |       This policy is responsible for ensuring that a RoleBinding for the prometheus | ||||||
|  |       monitoring instances is created to enable monitoring of  the namespace in | ||||||
|  |       question. | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | spec: | ||||||
|  |   background: true | ||||||
|  |   generateExisting: true | ||||||
|  |   rules: | ||||||
|  |     - name: generate-prometheus-binding | ||||||
|  |       match: | ||||||
|  |         any: | ||||||
|  |         - resources: | ||||||
|  |             kinds: | ||||||
|  |               - Namespace | ||||||
|  |       generate: | ||||||
|  |         synchronize: true | ||||||
|  |         apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  |         kind: RoleBinding | ||||||
|  |         name: prometheus-k8s | ||||||
|  |         namespace: "{{ `{{` }}request.object.metadata.name }}" | ||||||
|  |         data: | ||||||
|  |           metadata: | ||||||
|  |             labels: | ||||||
|  |               app.kubernetes.io/component: prometheus | ||||||
|  |               app.kubernetes.io/name: prometheus | ||||||
|  |               app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |               app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |           roleRef: | ||||||
|  |             apiGroup: rbac.authorization.k8s.io | ||||||
|  |             kind: Role | ||||||
|  |             name: prometheus-k8s | ||||||
|  |           subjects: | ||||||
|  |           - kind: ServiceAccount | ||||||
|  |             name: prometheus-k8s | ||||||
|  |             namespace: "{{ .Values.nfc_monitoring.prometheus.namespace }}" | ||||||
|  | {{ end }} | ||||||
							
								
								
									
										42
									
								
								templates/ClusterRole-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								templates/ClusterRole-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   name: grafana-agent | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - "" | ||||||
|  |   resources: | ||||||
|  |   - nodes | ||||||
|  |   - nodes/proxy | ||||||
|  |   - services | ||||||
|  |   - endpoints | ||||||
|  |   - pods | ||||||
|  |   - events | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - nonResourceURLs: | ||||||
|  |   - /metrics | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
|  | - apiGroups: | ||||||
|  |   - authentication.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - tokenreviews | ||||||
|  |   verbs: | ||||||
|  |   - create | ||||||
|  | - apiGroups: | ||||||
|  |   - authorization.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - subjectaccessreviews | ||||||
|  |   verbs: | ||||||
|  |   - create | ||||||
							
								
								
									
										26
									
								
								templates/ClusterRole-aggregated-metrics-reader.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								templates/ClusterRole-aggregated-metrics-reader.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     rbac.authorization.k8s.io/aggregate-to-admin: "true" | ||||||
|  |     rbac.authorization.k8s.io/aggregate-to-edit: "true" | ||||||
|  |     rbac.authorization.k8s.io/aggregate-to-view: "true" | ||||||
|  |   name: system:aggregated-metrics-reader | ||||||
|  |   namespace: monitoring | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - metrics.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - pods | ||||||
|  |   - nodes | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: resource-metrics:system:auth-delegator | ||||||
|  |   namespace: monitoring | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: system:auth-delegator | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
| @ -0,0 +1,19 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   name: hpa-controller-custom-metrics | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: custom-metrics-server-resources | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: horizontal-pod-autoscaler | ||||||
|  |   namespace: kube-system | ||||||
							
								
								
									
										20
									
								
								templates/ClusterRole-binding-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/ClusterRole-binding-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: prometheus-adapter | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										18
									
								
								templates/ClusterRole-grafana-SideCar.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								templates/ClusterRole-grafana-SideCar.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | {{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}} | ||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: Grafana-Sidecar | ||||||
|  | rules: | ||||||
|  |   - apiGroups: [""] | ||||||
|  |     resources: ["configmaps"] | ||||||
|  |     verbs: ["get", "watch", "list"] | ||||||
|  | {{- end }} | ||||||
							
								
								
									
										21
									
								
								templates/ClusterRole-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								templates/ClusterRole-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | rules: | ||||||
|  | - apiGroups: ["authentication.k8s.io"] | ||||||
|  |   resources: | ||||||
|  |   - tokenreviews | ||||||
|  |   verbs: ["create"] | ||||||
|  | - apiGroups: ["authorization.k8s.io"] | ||||||
|  |   resources: | ||||||
|  |   - subjectaccessreviews | ||||||
|  |   verbs: ["create"] | ||||||
							
								
								
									
										20
									
								
								templates/ClusterRole-kubeStateMetrics-1.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/ClusterRole-kubeStateMetrics-1.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-state-metrics | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: kube-state-metrics | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: kube-state-metrics | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										132
									
								
								templates/ClusterRole-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								templates/ClusterRole-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,132 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-state-metrics | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - "" | ||||||
|  |   resources: | ||||||
|  |   - configmaps | ||||||
|  |   - secrets | ||||||
|  |   - nodes | ||||||
|  |   - pods | ||||||
|  |   - services | ||||||
|  |   - serviceaccounts | ||||||
|  |   - resourcequotas | ||||||
|  |   - replicationcontrollers | ||||||
|  |   - limitranges | ||||||
|  |   - persistentvolumeclaims | ||||||
|  |   - persistentvolumes | ||||||
|  |   - namespaces | ||||||
|  |   - endpoints | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - apps | ||||||
|  |   resources: | ||||||
|  |   - statefulsets | ||||||
|  |   - daemonsets | ||||||
|  |   - deployments | ||||||
|  |   - replicasets | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - batch | ||||||
|  |   resources: | ||||||
|  |   - cronjobs | ||||||
|  |   - jobs | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - autoscaling | ||||||
|  |   resources: | ||||||
|  |   - horizontalpodautoscalers | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - authentication.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - tokenreviews | ||||||
|  |   verbs: | ||||||
|  |   - create | ||||||
|  | - apiGroups: | ||||||
|  |   - authorization.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - subjectaccessreviews | ||||||
|  |   verbs: | ||||||
|  |   - create | ||||||
|  | - apiGroups: | ||||||
|  |   - policy | ||||||
|  |   resources: | ||||||
|  |   - poddisruptionbudgets | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - certificates.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - certificatesigningrequests | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - discovery.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - endpointslices | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - storage.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - storageclasses | ||||||
|  |   - volumeattachments | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - admissionregistration.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - mutatingwebhookconfigurations | ||||||
|  |   - validatingwebhookconfigurations | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - networking.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - networkpolicies | ||||||
|  |   - ingressclasses | ||||||
|  |   - ingresses | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - coordination.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - leases | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
|  | - apiGroups: | ||||||
|  |   - rbac.authorization.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - clusterrolebindings | ||||||
|  |   - clusterroles | ||||||
|  |   - rolebindings | ||||||
|  |   - roles | ||||||
|  |   verbs: | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
| @ -0,0 +1,18 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: resource-metrics-server-resources | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - metrics.k8s.io | ||||||
|  |   resources: | ||||||
|  |   - '*' | ||||||
|  |   verbs: | ||||||
|  |   - '*' | ||||||
							
								
								
									
										24
									
								
								templates/ClusterRole-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								templates/ClusterRole-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - "" | ||||||
|  |   resources: | ||||||
|  |   - nodes | ||||||
|  |   - namespaces | ||||||
|  |   - pods | ||||||
|  |   - services | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
|  |   - list | ||||||
|  |   - watch | ||||||
							
								
								
									
										23
									
								
								templates/ClusterRole-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								templates/ClusterRole-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRole | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - "" | ||||||
|  |   resources: | ||||||
|  |   - nodes/metrics | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
|  | - nonResourceURLs: | ||||||
|  |   - /metrics | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
							
								
								
									
										20
									
								
								templates/ClusterRoleBinding-Grafana-Agent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/ClusterRoleBinding-Grafana-Agent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: grafana-agent | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: grafana-agent | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: grafana-agent | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										22
									
								
								templates/ClusterRoleBinding-Grafana-SideCar.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								templates/ClusterRoleBinding-Grafana-SideCar.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | |||||||
|  | {{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}} | ||||||
|  | --- | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: Grafana-Sidecar | ||||||
|  | roleRef: | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: Grafana-Sidecar | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  | subjects: | ||||||
|  |   - kind: ServiceAccount | ||||||
|  |     name: grafana | ||||||
|  |     namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}" | ||||||
|  | {{- end }} | ||||||
							
								
								
									
										20
									
								
								templates/ClusterRoleBinding-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/ClusterRoleBinding-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										20
									
								
								templates/ClusterRoleBinding-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/ClusterRoleBinding-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: ClusterRoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: ClusterRole | ||||||
|  |   name: prometheus-k8s | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: prometheus-k8s | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
							
								
								
									
										130
									
								
								templates/ConfigMap-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								templates/ConfigMap-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,130 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: ConfigMap | ||||||
|  | metadata: | ||||||
|  |   name: grafana-config | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}" | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | data: | ||||||
|  |   prometheus.yaml: |- | ||||||
|  |     { | ||||||
|  |         "apiVersion": 1, | ||||||
|  |         "datasources": [ | ||||||
|  |             { | ||||||
|  |                "access":"proxy", | ||||||
|  |                 "editable": true, | ||||||
|  |                 "name": "prometheus", | ||||||
|  |                 "orgId": 1, | ||||||
|  |                 "type": "prometheus", | ||||||
|  |                 "url": "http://prometheus-config-map.monitoring.svc:9090", | ||||||
|  |                 "version": 1 | ||||||
|  |             } | ||||||
|  |         ] | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |   oncall-app.yaml: |- | ||||||
|  |     apiVersion: 1 | ||||||
|  |     apps: | ||||||
|  |     - type: grafana-oncall-app | ||||||
|  |       name: grafana-oncall-app | ||||||
|  |       disabled: false | ||||||
|  |       jsonData: | ||||||
|  |         #user: admin | ||||||
|  |         grafanaUrl: http://grafana.monitoring.svc:3000 | ||||||
|  |         onCallApiUrl: http://oncall.monitoring.svc:8080 | ||||||
|  |       secureJsonData: | ||||||
|  |         #password: admin | ||||||
|  |         onCallInvitationToken: yaD3a6dkYV0CFinTlMPb42Mi9 | ||||||
|  |         #http://127.0.0.1:8080/integrations/v1/alertmanager/yaD3a6dkYV0CFinTlMPb42Mi9/ | ||||||
|  |  | ||||||
|  |  | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: ConfigMap | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: grafana-alertmanager | ||||||
|  |   namespace: monitoring | ||||||
|  | data: | ||||||
|  |   alertmanager.yaml: |- | ||||||
|  |     { | ||||||
|  |       "template_files": {}, | ||||||
|  |       "template_file_provenances": {}, | ||||||
|  |       "alertmanager_config": { | ||||||
|  |         "route": { | ||||||
|  |           "receiver": "Grafana Alerting 😊", | ||||||
|  |           "group_by": [ | ||||||
|  |             "grafana_folder", | ||||||
|  |             "alertname" | ||||||
|  |           ], | ||||||
|  |           "routes": [ | ||||||
|  |             { | ||||||
|  |               "receiver": "Grafana Alerting 😊", | ||||||
|  |               "object_matchers": [ | ||||||
|  |                 [ | ||||||
|  |                   "severity", | ||||||
|  |                   "!=", | ||||||
|  |                   "" | ||||||
|  |                 ], | ||||||
|  |                 [ | ||||||
|  |                   "severity", | ||||||
|  |                   "=", | ||||||
|  |                   "critical" | ||||||
|  |                 ], | ||||||
|  |                 [ | ||||||
|  |                   "severity", | ||||||
|  |                   "=", | ||||||
|  |                   "info" | ||||||
|  |                 ], | ||||||
|  |                 [ | ||||||
|  |                   "severity", | ||||||
|  |                   "=", | ||||||
|  |                   "warning" | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "continue": true, | ||||||
|  |               "group_wait": "1s", | ||||||
|  |               "group_interval": "2s", | ||||||
|  |               "repeat_interval": "15m" | ||||||
|  |             } | ||||||
|  |           ], | ||||||
|  |           "group_wait": "1s", | ||||||
|  |           "group_interval": "2s", | ||||||
|  |           "repeat_interval": "3m" | ||||||
|  |         }, | ||||||
|  |         "templates": null, | ||||||
|  |         "receivers": [ | ||||||
|  |           { | ||||||
|  |             "name": "Grafana Alerting 😊", | ||||||
|  |             "grafana_managed_receiver_configs": [ | ||||||
|  |               { | ||||||
|  |                 "uid": "4dqvJSfVkz", | ||||||
|  |                 "name": "Grafana Alerting 😊", | ||||||
|  |                 "type": "webhook", | ||||||
|  |                 "disableResolveMessage": false, | ||||||
|  |                 "settings": { | ||||||
|  |                   "httpMethod": "POST", | ||||||
|  |                   "url": "http://host.docker.internal:8080/integrations/v1/grafana_alerting/dXStYoK9Z15VZW8R8AtfyIwtu/" | ||||||
|  |                 }, | ||||||
|  |                 "secureFields": {} | ||||||
|  |               } | ||||||
|  |             ] | ||||||
|  |           } | ||||||
|  |         ] | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | --- | ||||||
							
								
								
									
										284
									
								
								templates/ConfigMap-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										284
									
								
								templates/ConfigMap-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,284 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: ConfigMap | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: grafana-agent | ||||||
|  |   namespace: monitoring | ||||||
|  | data: | ||||||
|  |   agent.yaml: | | ||||||
|  |     metrics: | ||||||
|  |       wal_directory: /tmp/wal | ||||||
|  |  | ||||||
|  |     logs: | ||||||
|  |       positions_directory: "/tmp" | ||||||
|  |  | ||||||
|  |       configs: | ||||||
|  |  | ||||||
|  |       - name: node-logs | ||||||
|  |         clients: | ||||||
|  |           - url: http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}/loki/api/v1/push | ||||||
|  |             backoff_config: | ||||||
|  |               min_period: 10s | ||||||
|  |               max_period: 5m | ||||||
|  |               max_retries: 10 | ||||||
|  |  | ||||||
|  |         limits_config: | ||||||
|  |           readline_rate_enabled: true | ||||||
|  |           readline_rate: 250 | ||||||
|  |           readline_burst: 750 | ||||||
|  |           readline_rate_drop: false | ||||||
|  |           max_streams: 500 | ||||||
|  |  | ||||||
|  |         scrape_configs: | ||||||
|  |  | ||||||
|  |           - job_name: systemd-journal | ||||||
|  |             journal: | ||||||
|  |               path: /host/root/run/log/journal | ||||||
|  |               json: true | ||||||
|  |               max_age: 24h | ||||||
|  |             relabel_configs: | ||||||
|  |                  | ||||||
|  |             - source_labels: | ||||||
|  |               - __journal__systemd_unit | ||||||
|  |               target_label: systemd_unit | ||||||
|  |             - source_labels: | ||||||
|  |               - __journal__hostname | ||||||
|  |               target_label: node | ||||||
|  |             - source_labels: | ||||||
|  |               - __journal_syslog_identifier | ||||||
|  |               target_label: syslog_identifier | ||||||
|  |             - target_label: "job" | ||||||
|  |               replacement: "systemd-journal" | ||||||
|  |             - target_label: "job_name" | ||||||
|  |               replacement: "journal-logs" | ||||||
|  |             - target_label: hostname | ||||||
|  |               replacement: "${HOSTNAME}" | ||||||
|  |  | ||||||
|  |             pipeline_stages: | ||||||
|  |               - json: | ||||||
|  |                   expressions: | ||||||
|  |                     pid: _PID | ||||||
|  |                     userId: _UID | ||||||
|  |                     application: _COMM | ||||||
|  |                     priority: PRIORITY | ||||||
|  |  | ||||||
|  |               - labels: | ||||||
|  |                   application: | ||||||
|  |                   #level:  | ||||||
|  |                   pid:  | ||||||
|  |                   userId: | ||||||
|  |                   priority: | ||||||
|  |               - template: | ||||||
|  |                   source: level | ||||||
|  |                   template: '{{"{{"}} ToLower .Value {{"}}"}}' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="7"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'debug' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="6"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'info' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="5"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'notice' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="4"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'warning' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="3"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'error' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="2"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'crit' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="1"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'alert' | ||||||
|  |               - match: | ||||||
|  |                   selector: '{priority="0"}' | ||||||
|  |                   stages: | ||||||
|  |                   - template: | ||||||
|  |                       source: level | ||||||
|  |                       template: 'emerg' | ||||||
|  |               - labels: | ||||||
|  |                   level:  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |           - job_name: var-logs | ||||||
|  |             static_configs: | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'aptitude' | ||||||
|  |                   __path__: /var/log/apt/*.log | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'dpkg' | ||||||
|  |                   __path__: /var/log/dpkg.log | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'messages' | ||||||
|  |                   __path__: /var/log/messages | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'syslog' | ||||||
|  |                   __path__: /var/log/syslog | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'kernlog' | ||||||
|  |                   __path__: /var/log/kern.log | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'auth-log' | ||||||
|  |                   __path__: /var/log/auth.log | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'boot-log' | ||||||
|  |                   __path__: /var/log/boot.log | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'daemon-log' | ||||||
|  |                   __path__: /var/log/daemon.log | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'faillog-log' | ||||||
|  |                   __path__: /var/log/faillog | ||||||
|  |               - targets: [localhost] | ||||||
|  |                 labels: | ||||||
|  |                   job_name: 'lastlog-log' | ||||||
|  |                   __path__: /var/log/lastlog | ||||||
|  |  | ||||||
|  |             relabel_configs: | ||||||
|  |               - target_label: hostname | ||||||
|  |                 replacement: "${HOSTNAME}" | ||||||
|  |               - target_label: job | ||||||
|  |                 replacement: log-files | ||||||
|  |               - target_label: node | ||||||
|  |                 replacement: "${HOSTNAME}" | ||||||
|  |  | ||||||
|  |             pipeline_stages: | ||||||
|  |               - timestamp: | ||||||
|  |                   format: RFC3339 | ||||||
|  |                   source: time | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |           - job_name: pod-logs | ||||||
|  |             kubernetes_sd_configs: | ||||||
|  |               - role: pod | ||||||
|  |  | ||||||
|  |             pipeline_stages: | ||||||
|  |               - cri: {} | ||||||
|  |  | ||||||
|  |               - regex: | ||||||
|  |                   expression: '(\s|\t|level=)?(?P<level>trace|debug|info|warn|error|TRACE|DEBUG|INFO|WARN|ERROR)(\s|\t)' | ||||||
|  |  | ||||||
|  |               - labels: | ||||||
|  |                   level: | ||||||
|  |  | ||||||
|  |             relabel_configs: | ||||||
|  |               - source_labels: | ||||||
|  |                   - __meta_kubernetes_pod_node_name | ||||||
|  |                 target_label: __host__ | ||||||
|  |               - action: labelmap | ||||||
|  |                 regex: __meta_kubernetes_pod_label_(.+) | ||||||
|  |               - action: replace | ||||||
|  |                 source_labels: | ||||||
|  |                   - __meta_kubernetes_namespace | ||||||
|  |                 target_label: namespace | ||||||
|  |               - target_label: job | ||||||
|  |                 replacement: kubernetes_sd | ||||||
|  |               - action: replace | ||||||
|  |                 source_labels: | ||||||
|  |                   - __meta_kubernetes_pod_name | ||||||
|  |                 target_label: pod | ||||||
|  |               - action: replace | ||||||
|  |                 source_labels: | ||||||
|  |                   - __meta_kubernetes_pod_container_name | ||||||
|  |                 target_label: container | ||||||
|  |               - replacement: /var/log/pods/*$1/*.log | ||||||
|  |                 separator: / | ||||||
|  |                 source_labels: | ||||||
|  |                   - __meta_kubernetes_pod_uid | ||||||
|  |                   - __meta_kubernetes_pod_container_name | ||||||
|  |                 target_label: __path__ | ||||||
|  |               - target_label: "job_name" | ||||||
|  |                 replacement: "kubernetes-logs" | ||||||
|  |               - target_label: hostname | ||||||
|  |                 replacement: "${HOSTNAME}" | ||||||
|  |               - target_label: node | ||||||
|  |                 source_labels: | ||||||
|  |                   - __meta_kubernetes_pod_node_name | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     integrations: | ||||||
|  |  | ||||||
|  |       node_exporter: | ||||||
|  |         enabled: true | ||||||
|  |         rootfs_path: /host/root | ||||||
|  |         sysfs_path: /host/sys | ||||||
|  |         procfs_path: /host/proc | ||||||
|  |         udev_data_path: /host/root/run/udev/data | ||||||
|  |  | ||||||
|  |         # collector.filesystem.ignored-mount-points: ^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+|/run/containerd/io.containerd.+)($|/) | ||||||
|  |         filesystem_mount_points_exclude: "^/(dev|proc|sys|var/lib/docker/.+|/run/containerd/io.containerd.+)($|/)" | ||||||
|  |         #filesystem_fs_types_exclude: "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|shm|squashfs|sysfs|tracefs)$" | ||||||
|  |         filesystem_fs_types_exclude: "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|ugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|ocfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$" | ||||||
|  |  | ||||||
|  |  | ||||||
|  |         netclass_ignored_devices: "^(veth.*|cali.*|[a-f0-9]{15})$" | ||||||
|  |         netdev_device_exclude: "^(veth.*|cali.*|[a-f0-9]{15})$" | ||||||
|  |  | ||||||
|  |         scrape_integration: true | ||||||
|  |  | ||||||
|  |         include_exporter_metrics: true | ||||||
|  |         enable_collectors: | ||||||
|  |           - uname | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   syslog_server.yaml: | | ||||||
|  |     # REF: https://grafana.com/docs/loki/latest/send-data/promtail/configuration/#example-syslog-config | ||||||
|  |     server: | ||||||
|  |       http_listen_port: 9080 | ||||||
|  |       grpc_listen_port: 0 | ||||||
|  |  | ||||||
|  |     positions: | ||||||
|  |       filename: /tmp/syslog_server_positions.yaml | ||||||
|  |  | ||||||
|  |     clients: | ||||||
|  |       - url: http://{{ .Values.nfc_monitoring.loki.service_name }}.{{ .Values.nfc_monitoring.loki.namespace }}.svc.{{ .Values.nfc_monitoring.kubernetes.cluster_dns_name }}:{{ .Values.nfc_monitoring.loki.service_port }}/loki/api/v1/push | ||||||
|  |  | ||||||
|  |     scrape_configs: | ||||||
|  |       - job_name: syslog | ||||||
|  |         syslog: | ||||||
|  |           listen_address: 0.0.0.0:1514 | ||||||
|  |           labels: | ||||||
|  |             job: "syslog" | ||||||
|  |         relabel_configs: | ||||||
|  |           - source_labels: ['__syslog_message_hostname'] | ||||||
|  |             target_label: 'host' | ||||||
							
								
								
									
										31
									
								
								templates/ConfigMap-GrafanaProvisioning.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								templates/ConfigMap-GrafanaProvisioning.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | |||||||
|  | {{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled -}} | ||||||
|  | --- | ||||||
|  | # Provisioning config | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: ConfigMap | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: provisioning-config | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}" | ||||||
|  | data: | ||||||
|  |   provisioning.yaml: |- | ||||||
|  |     apiVersion: 1 | ||||||
|  |     providers: | ||||||
|  |       - name: 'configmap-dashboard-provider' | ||||||
|  |         orgId: 1 | ||||||
|  |         folder: '' | ||||||
|  |         folderUid: '' | ||||||
|  |         type: file | ||||||
|  |         disableDeletion: false | ||||||
|  |         updateIntervalSeconds: 10 | ||||||
|  |         allowUiUpdates: false | ||||||
|  |         options: | ||||||
|  |           path: /var/lib/grafana/dashboards | ||||||
|  |           foldersFromFilesStructure: true | ||||||
|  | {{- end }} | ||||||
							
								
								
									
										72
									
								
								templates/ConfigMap-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								templates/ConfigMap-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,72 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: ConfigMap | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: adapter-config | ||||||
|  |   namespace: monitoring | ||||||
|  | data: | ||||||
|  |   config.yaml: |- | ||||||
|  |     "resourceRules": | ||||||
|  |       "cpu": | ||||||
|  |         "containerLabel": "container" | ||||||
|  |         "containerQuery": | | ||||||
|  |           sum by (<<.GroupBy>>) ( | ||||||
|  |             irate ( | ||||||
|  |                 container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s] | ||||||
|  |             ) | ||||||
|  |           ) | ||||||
|  |         "nodeQuery": | | ||||||
|  |           sum by (<<.GroupBy>>) ( | ||||||
|  |             1 - irate( | ||||||
|  |               node_cpu_seconds_total{mode="idle"}[60s] | ||||||
|  |             ) | ||||||
|  |             * on(namespace, pod) group_left(node) ( | ||||||
|  |               node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>} | ||||||
|  |             ) | ||||||
|  |           ) | ||||||
|  |           or sum by (<<.GroupBy>>) ( | ||||||
|  |             1 - irate( | ||||||
|  |               windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m] | ||||||
|  |             ) | ||||||
|  |           ) | ||||||
|  |         "resources": | ||||||
|  |           "overrides": | ||||||
|  |             "namespace": | ||||||
|  |               "resource": "namespace" | ||||||
|  |             "node": | ||||||
|  |               "resource": "node" | ||||||
|  |             "pod": | ||||||
|  |               "resource": "pod" | ||||||
|  |       "memory": | ||||||
|  |         "containerLabel": "container" | ||||||
|  |         "containerQuery": | | ||||||
|  |           sum by (<<.GroupBy>>) ( | ||||||
|  |             container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!=""} | ||||||
|  |           ) | ||||||
|  |         "nodeQuery": | | ||||||
|  |           sum by (<<.GroupBy>>) ( | ||||||
|  |             node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} | ||||||
|  |             - | ||||||
|  |             node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>} | ||||||
|  |           ) | ||||||
|  |           or sum by (<<.GroupBy>>) ( | ||||||
|  |             windows_cs_physical_memory_bytes{job="windows-exporter",<<.LabelMatchers>>} | ||||||
|  |             - | ||||||
|  |             windows_memory_available_bytes{job="windows-exporter",<<.LabelMatchers>>} | ||||||
|  |           ) | ||||||
|  |         "resources": | ||||||
|  |           "overrides": | ||||||
|  |             "instance": | ||||||
|  |               "resource": "node" | ||||||
|  |             "namespace": | ||||||
|  |               "resource": "namespace" | ||||||
|  |             "pod": | ||||||
|  |               "resource": "pod" | ||||||
|  |       "window": "5m" | ||||||
							
								
								
									
										138
									
								
								templates/Daemonset-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								templates/Daemonset-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,138 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: apps/v1 | ||||||
|  | kind: DaemonSet | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     metricsJob: node-exporter | ||||||
|  |     cadvisormetricsJob: cadvisor | ||||||
|  |     nodeExportermetricsJob: node | ||||||
|  |   name: grafana-agent | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.grafana_agent.namespace }}" | ||||||
|  | spec: | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/component: exporter | ||||||
|  |       app.kubernetes.io/name: grafana-agent | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |       metricsJob: node-exporter | ||||||
|  |       cadvisormetricsJob: cadvisor | ||||||
|  |       nodeExportermetricsJob: node | ||||||
|  |   template: | ||||||
|  |     metadata: | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/instance: k8s | ||||||
|  |         app.kubernetes.io/component: exporter | ||||||
|  |         app.kubernetes.io/name: grafana-agent | ||||||
|  |         app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |         app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |         metricsJob: node-exporter | ||||||
|  |         cadvisormetricsJob: cadvisor | ||||||
|  |         nodeExportermetricsJob: node | ||||||
|  |     spec: | ||||||
|  |       automountServiceAccountToken: true | ||||||
|  |       containers: | ||||||
|  |       - args: | ||||||
|  |         - --server.http.address=0.0.0.0:12345 | ||||||
|  |         - --config.file=/etc/agent/agent.yaml | ||||||
|  |         - --config.expand-env=true | ||||||
|  |         name: grafana-agent | ||||||
|  |         image: "{{ .Values.nfc_monitoring.grafana_agent.image.name }}:{{ .Values.nfc_monitoring.grafana_agent.image.tag }}" | ||||||
|  |         #imagePullPolicy: Never | ||||||
|  |         ports: | ||||||
|  |         - containerPort: 12345 | ||||||
|  |           name: grafana-metrics | ||||||
|  |           protocol: TCP | ||||||
|  |         resources: | ||||||
|  |           limits: | ||||||
|  |             cpu: 1000m | ||||||
|  |             memory: 180Mi | ||||||
|  |           requests: | ||||||
|  |             cpu: 40m | ||||||
|  |             memory: 180Mi | ||||||
|  |         securityContext: | ||||||
|  |           capabilities: | ||||||
|  |             add: | ||||||
|  |             - SYS_TIME | ||||||
|  |             # drop: | ||||||
|  |             # - ALL | ||||||
|  |           readOnlyRootFilesystem: false | ||||||
|  |           privileged: true | ||||||
|  |         volumeMounts: | ||||||
|  |         - mountPath: /host/sys | ||||||
|  |           mountPropagation: HostToContainer | ||||||
|  |           name: sys | ||||||
|  |           readOnly: true | ||||||
|  |         - mountPath: /host/proc | ||||||
|  |           mountPropagation: HostToContainer | ||||||
|  |           name: proc | ||||||
|  |           readOnly: true | ||||||
|  |         - mountPath: /host/root | ||||||
|  |           mountPropagation: HostToContainer | ||||||
|  |           name: rootfs | ||||||
|  |           readOnly: true | ||||||
|  |         - mountPath: /var/log | ||||||
|  |           mountPropagation: HostToContainer | ||||||
|  |           name: logs | ||||||
|  |           readOnly: true | ||||||
|  |         - name: config | ||||||
|  |           mountPath: "/etc/agent" | ||||||
|  |           readOnly: false | ||||||
|  |         - name: temp | ||||||
|  |           mountPath: "/tmp" | ||||||
|  |           readOnly: false | ||||||
|  |         - name: agent-data | ||||||
|  |           mountPath: "/etc/agent/data" | ||||||
|  |           readOnly: false | ||||||
|  |       dnsPolicy: ClusterFirst | ||||||
|  |       volumes: | ||||||
|  |       - hostPath: | ||||||
|  |           path: /sys | ||||||
|  |         name: sys | ||||||
|  |       - hostPath: | ||||||
|  |           path: /proc | ||||||
|  |         name: proc | ||||||
|  |       - hostPath: | ||||||
|  |           path: / | ||||||
|  |         name: rootfs | ||||||
|  |       - hostPath: | ||||||
|  |           path: /var/log | ||||||
|  |         name: logs | ||||||
|  |       - name: config | ||||||
|  |         configMap: | ||||||
|  |           name: grafana-agent | ||||||
|  |           items: | ||||||
|  |             - key: "agent.yaml" | ||||||
|  |               path: "agent.yaml" | ||||||
|  |       - name: temp | ||||||
|  |         emptyDir: {} | ||||||
|  |       - name: agent-data | ||||||
|  |         emptyDir: {} | ||||||
|  |  | ||||||
|  |       - name: var-run | ||||||
|  |         hostPath: | ||||||
|  |           path: /var/run | ||||||
|  |       - name: containerd | ||||||
|  |         hostPath: | ||||||
|  |           path: /var/lib/contairnerd | ||||||
|  |       - name: disk | ||||||
|  |         hostPath: | ||||||
|  |           path: /dev/disk | ||||||
|  |  | ||||||
|  |       nodeSelector: | ||||||
|  |         kubernetes.io/os: linux | ||||||
|  |       hostNetwork: true | ||||||
|  |       hostPID: true | ||||||
|  |       priorityClassName: system-cluster-critical | ||||||
|  |       serviceAccountName: grafana-agent | ||||||
|  |       tolerations: | ||||||
|  |       - operator: Exists | ||||||
							
								
								
									
										136
									
								
								templates/Daemonset-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								templates/Daemonset-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,136 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: apps/v1 | ||||||
|  | kind: DaemonSet | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |  | ||||||
|  |     nodeExportermetricsJob: node | ||||||
|  |     name_kcm: kube-controller-manager | ||||||
|  |  | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.kube_monitor_proxy.namespace }}" | ||||||
|  | spec: | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: proxy | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   template: | ||||||
|  |     metadata: | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/component: proxy | ||||||
|  |         app.kubernetes.io/instance: k8s | ||||||
|  |         app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |         app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |         app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     spec: | ||||||
|  |       automountServiceAccountToken: true | ||||||
|  |       hostNetwork: true | ||||||
|  |       affinity: | ||||||
|  |         nodeAffinity: | ||||||
|  |           requiredDuringSchedulingIgnoredDuringExecution: | ||||||
|  |             nodeSelectorTerms: | ||||||
|  |             - matchExpressions: | ||||||
|  |               - key: node-role.kubernetes.io/control-plane | ||||||
|  |                 operator: Exists | ||||||
|  |       containers: | ||||||
|  |        | ||||||
|  |       - args: | ||||||
|  |         - --logtostderr | ||||||
|  |         - --v=10 | ||||||
|  |         - --secure-listen-address=[$(IP)]:11257 | ||||||
|  |         - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 | ||||||
|  |         - --upstream=https://127.0.0.1:10257/ | ||||||
|  |         - --client-ca-file=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||||
|  |         - --upstream-ca-file=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||||
|  |         env: | ||||||
|  |         - name: NODE-IP | ||||||
|  |           valueFrom: | ||||||
|  |             fieldRef: | ||||||
|  |               apiVersion: v1 | ||||||
|  |               fieldPath: status.hostIP | ||||||
|  |         - name: IP | ||||||
|  |           valueFrom: | ||||||
|  |             fieldRef: | ||||||
|  |               fieldPath: status.podIP | ||||||
|  |         image: "{{ .Values.nfc_monitoring.kube_rbac_proxy.image.name }}:{{ .Values.nfc_monitoring.kube_rbac_proxy.image.tag }}" | ||||||
|  |         name: kube-rbac-proxy-kube-ctrl-mgr | ||||||
|  |         ports: | ||||||
|  |         - containerPort: 10257 | ||||||
|  |           #hostPort: 9100 | ||||||
|  |           name: kube-ctrl-mgr | ||||||
|  |         resources: | ||||||
|  |           limits: | ||||||
|  |             cpu: 200m | ||||||
|  |             memory: 256Mi | ||||||
|  |           requests: | ||||||
|  |             cpu: 10m | ||||||
|  |             memory: 20Mi | ||||||
|  |         securityContext: | ||||||
|  |           allowPrivilegeEscalation: false | ||||||
|  |           capabilities: | ||||||
|  |             drop: | ||||||
|  |             - ALL | ||||||
|  |           readOnlyRootFilesystem: true | ||||||
|  |           runAsGroup: 65532 | ||||||
|  |           runAsNonRoot: true | ||||||
|  |           runAsUser: 65532 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |       - args: | ||||||
|  |         - --logtostderr | ||||||
|  |         - --v=10 | ||||||
|  |         - --secure-listen-address=[$(IP)]:11259 | ||||||
|  |         - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 | ||||||
|  |         - --upstream=https://127.0.0.1:10259/ | ||||||
|  |         - --client-ca-file=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||||
|  |         - --upstream-ca-file=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||||
|  |         env: | ||||||
|  |         - name: NODE-IP | ||||||
|  |           valueFrom: | ||||||
|  |             fieldRef: | ||||||
|  |               apiVersion: v1 | ||||||
|  |               fieldPath: status.hostIP | ||||||
|  |         - name: IP | ||||||
|  |           valueFrom: | ||||||
|  |             fieldRef: | ||||||
|  |               fieldPath: status.podIP | ||||||
|  |         image: "{{ .Values.nfc_monitoring.kube_rbac_proxy.image.name }}:{{ .Values.nfc_monitoring.kube_rbac_proxy.image.tag }}" | ||||||
|  |         name: kube-rbac-proxy-kube-scheduler | ||||||
|  |         ports: | ||||||
|  |         - containerPort: 10259 | ||||||
|  |           #hostPort: 9100 | ||||||
|  |           name: kube-scheduler | ||||||
|  |         resources: | ||||||
|  |           limits: | ||||||
|  |             cpu: 200m | ||||||
|  |             memory: 256Mi | ||||||
|  |           requests: | ||||||
|  |             cpu: 10m | ||||||
|  |             memory: 20Mi | ||||||
|  |         securityContext: | ||||||
|  |           allowPrivilegeEscalation: false | ||||||
|  |           capabilities: | ||||||
|  |             drop: | ||||||
|  |             - ALL | ||||||
|  |           readOnlyRootFilesystem: true | ||||||
|  |           runAsGroup: 65532 | ||||||
|  |           runAsNonRoot: true | ||||||
|  |           runAsUser: 65532 | ||||||
|  |       nodeSelector: | ||||||
|  |         kubernetes.io/os: linux | ||||||
|  |       hostPID: true | ||||||
|  |       priorityClassName: system-cluster-critical | ||||||
|  |       serviceAccountName: kube-monitor-proxy | ||||||
|  |       tolerations: | ||||||
|  |       - operator: Exists | ||||||
							
								
								
									
										112
									
								
								templates/Deployment-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								templates/Deployment-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,112 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: apps/v1 | ||||||
|  | kind: Deployment | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-state-metrics | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.kube_state_metrics.namespace }}" | ||||||
|  | spec: | ||||||
|  |   replicas: 1 | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: exporter | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: kube-state-metrics | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |   template: | ||||||
|  |     metadata: | ||||||
|  |       annotations: | ||||||
|  |         kubectl.kubernetes.io/default-container: kube-state-metrics | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/component: exporter | ||||||
|  |         app.kubernetes.io/instance: k8s | ||||||
|  |         app.kubernetes.io/name: kube-state-metrics | ||||||
|  |         app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |         app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     spec: | ||||||
|  |       automountServiceAccountToken: true | ||||||
|  |       containers: | ||||||
|  |       - args: | ||||||
|  |         - --host=127.0.0.1 | ||||||
|  |         - --port=8081 | ||||||
|  |         - --telemetry-host=127.0.0.1 | ||||||
|  |         - --telemetry-port=8082 | ||||||
|  |         image: "{{ .Values.nfc_monitoring.kube_state_metrics.image.name }}:{{ .Values.nfc_monitoring.kube_state_metrics.image.tag }}" | ||||||
|  |         name: kube-state-metrics | ||||||
|  |         resources: | ||||||
|  |           limits: | ||||||
|  |             cpu: 200m | ||||||
|  |             memory: 250Mi | ||||||
|  |           requests: | ||||||
|  |             cpu: 10m | ||||||
|  |             memory: 190Mi | ||||||
|  |         securityContext: | ||||||
|  |           allowPrivilegeEscalation: false | ||||||
|  |           capabilities: | ||||||
|  |             drop: | ||||||
|  |             - ALL | ||||||
|  |           readOnlyRootFilesystem: true | ||||||
|  |           runAsUser: 65534 | ||||||
|  |       - args: | ||||||
|  |         - --logtostderr | ||||||
|  |         - --secure-listen-address=:8443 | ||||||
|  |         - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 | ||||||
|  |         - --upstream=http://127.0.0.1:8081/ | ||||||
|  |         image: quay.io/brancz/kube-rbac-proxy:v0.14.0 | ||||||
|  |         name: kube-rbac-proxy-main | ||||||
|  |         ports: | ||||||
|  |         - containerPort: 8443 | ||||||
|  |           name: https-main | ||||||
|  |         resources: | ||||||
|  |           limits: | ||||||
|  |             cpu: 100m | ||||||
|  |             memory: 40Mi | ||||||
|  |           requests: | ||||||
|  |             cpu: 50m | ||||||
|  |             memory: 20Mi | ||||||
|  |         securityContext: | ||||||
|  |           allowPrivilegeEscalation: false | ||||||
|  |           capabilities: | ||||||
|  |             drop: | ||||||
|  |             - ALL | ||||||
|  |           readOnlyRootFilesystem: true | ||||||
|  |           runAsGroup: 65532 | ||||||
|  |           runAsNonRoot: true | ||||||
|  |           runAsUser: 65532 | ||||||
|  |       - args: | ||||||
|  |         - --logtostderr | ||||||
|  |         - --secure-listen-address=:9443 | ||||||
|  |         - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 | ||||||
|  |         - --upstream=http://127.0.0.1:8082/ | ||||||
|  |         image: quay.io/brancz/kube-rbac-proxy:v0.14.0 | ||||||
|  |         name: kube-rbac-proxy-self | ||||||
|  |         ports: | ||||||
|  |         - containerPort: 9443 | ||||||
|  |           name: https-self | ||||||
|  |         resources: | ||||||
|  |           limits: | ||||||
|  |             cpu: 20m | ||||||
|  |             memory: 40Mi | ||||||
|  |           requests: | ||||||
|  |             cpu: 10m | ||||||
|  |             memory: 20Mi | ||||||
|  |         securityContext: | ||||||
|  |           allowPrivilegeEscalation: false | ||||||
|  |           capabilities: | ||||||
|  |             drop: | ||||||
|  |             - ALL | ||||||
|  |           readOnlyRootFilesystem: true | ||||||
|  |           runAsGroup: 65532 | ||||||
|  |           runAsNonRoot: true | ||||||
|  |           runAsUser: 65532 | ||||||
|  |       nodeSelector: | ||||||
|  |         kubernetes.io/os: linux | ||||||
|  |       serviceAccountName: kube-state-metrics | ||||||
							
								
								
									
										102
									
								
								templates/Deployment-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								templates/Deployment-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,102 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: apps/v1 | ||||||
|  | kind: Deployment | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.prometheus_adaptor.namespace }}" | ||||||
|  | spec: | ||||||
|  |   replicas: 2 | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: metrics-adapter | ||||||
|  |       app.kubernetes.io/instance: main | ||||||
|  |       app.kubernetes.io/name: prometheus-adapter | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   strategy: | ||||||
|  |     rollingUpdate: | ||||||
|  |       maxSurge: 1 | ||||||
|  |       maxUnavailable: 1 | ||||||
|  |   template: | ||||||
|  |     metadata: | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/component: metrics-adapter | ||||||
|  |         app.kubernetes.io/instance: main | ||||||
|  |         app.kubernetes.io/name: prometheus-adapter | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |         app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |         app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     spec: | ||||||
|  |       affinity: | ||||||
|  |         {{- toYaml .Values.nfc_monitoring.prometheus_adaptor.affinity | nindent 8 }} | ||||||
|  |       automountServiceAccountToken: true | ||||||
|  |       containers: | ||||||
|  |       - args: | ||||||
|  |         - --cert-dir=/var/run/serving-cert | ||||||
|  |         - --config=/etc/adapter/config.yaml | ||||||
|  |         - --metrics-relist-interval=1m | ||||||
|  |         - --prometheus-url=https://prometheus.monitoring.svc:9090/ | ||||||
|  |         - --secure-port=6443 | ||||||
|  |         - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA | ||||||
|  |         image: "{{ .Values.nfc_monitoring.prometheus_adaptor.image.name }}:{{ .Values.nfc_monitoring.prometheus_adaptor.image.tag }}" | ||||||
|  |         livenessProbe: | ||||||
|  |           failureThreshold: 5 | ||||||
|  |           httpGet: | ||||||
|  |             path: /livez | ||||||
|  |             port: https | ||||||
|  |             scheme: HTTPS | ||||||
|  |           initialDelaySeconds: 30 | ||||||
|  |           periodSeconds: 5 | ||||||
|  |         name: prometheus-adapter | ||||||
|  |         ports: | ||||||
|  |         - containerPort: 6443 | ||||||
|  |           name: https | ||||||
|  |         readinessProbe: | ||||||
|  |           failureThreshold: 5 | ||||||
|  |           httpGet: | ||||||
|  |             path: /readyz | ||||||
|  |             port: https | ||||||
|  |             scheme: HTTPS | ||||||
|  |           initialDelaySeconds: 30 | ||||||
|  |           periodSeconds: 5 | ||||||
|  |         resources: | ||||||
|  |           requests: | ||||||
|  |             cpu: 102m | ||||||
|  |             memory: 180Mi | ||||||
|  |         securityContext: | ||||||
|  |           allowPrivilegeEscalation: false | ||||||
|  |           capabilities: | ||||||
|  |             drop: | ||||||
|  |             - ALL | ||||||
|  |           readOnlyRootFilesystem: true | ||||||
|  |         terminationMessagePolicy: FallbackToLogsOnError | ||||||
|  |         volumeMounts: | ||||||
|  |         - mountPath: /tmp | ||||||
|  |           name: tmpfs | ||||||
|  |           readOnly: false | ||||||
|  |         - mountPath: /var/run/serving-cert | ||||||
|  |           name: volume-serving-cert | ||||||
|  |           readOnly: false | ||||||
|  |         - mountPath: /etc/adapter | ||||||
|  |           name: config | ||||||
|  |           readOnly: false | ||||||
|  |       nodeSelector: | ||||||
|  |         kubernetes.io/os: linux | ||||||
|  |       securityContext: {} | ||||||
|  |       serviceAccountName: prometheus-adapter | ||||||
|  |       volumes: | ||||||
|  |       - emptyDir: {} | ||||||
|  |         name: tmpfs | ||||||
|  |       - emptyDir: {} | ||||||
|  |         name: volume-serving-cert | ||||||
|  |       - configMap: | ||||||
|  |           name: adapter-config | ||||||
|  |         name: config | ||||||
							
								
								
									
										163
									
								
								templates/Grafana-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										163
									
								
								templates/Grafana-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,163 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  | kind: Grafana | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: grafana | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.grafana.namespace }}" | ||||||
|  | spec: | ||||||
|  |   config: | ||||||
|  |     log: | ||||||
|  |       mode: "console" | ||||||
|  |     auth: | ||||||
|  |       disable_login_form: "false" | ||||||
|  |     security: | ||||||
|  |       admin_user: "{{ .Values.nfc_monitoring.grafana.admin_user }}" | ||||||
|  |       admin_password: "{{ .Values.nfc_monitoring.grafana.admin_password }}" | ||||||
|  |   deployment: | ||||||
|  |     metadata: | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/component: graphing | ||||||
|  |         app.kubernetes.io/instance: k8s | ||||||
|  |         app.kubernetes.io/name: grafana | ||||||
|  |         app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |         app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     spec: | ||||||
|  |       replicas: {{ .Values.nfc_monitoring.grafana.replicas | int }} | ||||||
|  |       selector: | ||||||
|  |         matchLabels: | ||||||
|  |           app.kubernetes.io/component: graphing | ||||||
|  |           app.kubernetes.io/instance: k8s | ||||||
|  |           app.kubernetes.io/name: grafana | ||||||
|  |           app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |           app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       strategy: | ||||||
|  |         rollingUpdate: | ||||||
|  |           maxSurge: 1 | ||||||
|  |           maxUnavailable: 1 | ||||||
|  |       template: | ||||||
|  |         metadata: | ||||||
|  |           labels: | ||||||
|  |             app.kubernetes.io/component: graphing | ||||||
|  |             app.kubernetes.io/instance: k8s | ||||||
|  |             app.kubernetes.io/name: grafana | ||||||
|  |             app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |             app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |             app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |  | ||||||
|  |         spec: | ||||||
|  |           affinity: | ||||||
|  |             {{- toYaml .Values.nfc_monitoring.grafana.affinity | nindent 12 }} | ||||||
|  |           {{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled }} | ||||||
|  |           automountServiceAccountToken: true | ||||||
|  |           {{ end }} | ||||||
|  |           containers: | ||||||
|  |           - name: grafana | ||||||
|  |             image: "{{ .Values.nfc_monitoring.grafana.image.name }}:{{ .Values.nfc_monitoring.grafana.image.tag }}" | ||||||
|  |             env: | ||||||
|  |               - name: GF_INSTALL_PLUGINS | ||||||
|  |                 value: agenty-flowcharting-panel 0.9.1, ddurieux-glpi-app 1.3.1, grafana-oncall-app | ||||||
|  |               - name: GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS | ||||||
|  |                 value: grafana-oncall-app 1.1.38 | ||||||
|  |               - name: GF_ANALYTICS_REPORTING_ENABLED | ||||||
|  |                 value: 'false' | ||||||
|  |             imagePullPolicy: IfNotPresent | ||||||
|  |             ports: | ||||||
|  |               - containerPort: 3000 | ||||||
|  |                 name: grafana-http | ||||||
|  |             readinessProbe: | ||||||
|  |               failureThreshold: 3 | ||||||
|  |               httpGet: | ||||||
|  |                 path: /login | ||||||
|  |                 port: 3000 | ||||||
|  |                 scheme: HTTP | ||||||
|  |               initialDelaySeconds: 30 | ||||||
|  |               periodSeconds: 10 | ||||||
|  |               successThreshold: 1 | ||||||
|  |               timeoutSeconds: 3 | ||||||
|  |             resources: | ||||||
|  |               limits: | ||||||
|  |                 cpu: 2000m | ||||||
|  |                 memory: 1Gi | ||||||
|  |               requests: | ||||||
|  |                 cpu: 100m | ||||||
|  |                 memory: 100Mi | ||||||
|  |             securityContext: | ||||||
|  |               allowPrivilegeEscalation: false | ||||||
|  |               readOnlyRootFilesystem: false | ||||||
|  |             volumeMounts: | ||||||
|  |               - mountPath: /var/lib/grafana | ||||||
|  |                 name: grafana-storage | ||||||
|  |               - mountPath: /etc/grafana/provisioning/plugins | ||||||
|  |                 name: plugin-config | ||||||
|  |                 readOnly: false | ||||||
|  |               {{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled }} | ||||||
|  |               - mountPath: /etc/grafana/provisioning/dashboards | ||||||
|  |                 name: provisioning-config | ||||||
|  |               - mountPath: /var/lib/grafana/dashboards | ||||||
|  |                 name: dashboards | ||||||
|  |            | ||||||
|  |           - image: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.image.name }}:{{ .Values.nfc_monitoring.additions.dashboard_sidecar.image.tag}}" | ||||||
|  |             name: k8s-sidecar | ||||||
|  |             env: | ||||||
|  |               - name: LABEL | ||||||
|  |                 value: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.label_name }}" | ||||||
|  |               - name: LABEL_VALUE | ||||||
|  |                 value: "{{ .Values.nfc_monitoring.additions.dashboard_sidecar.label_value }}" | ||||||
|  |               - name: FOLDER | ||||||
|  |                 value: /var/lib/grafana/dashboards | ||||||
|  |               - name: NAMESPACE | ||||||
|  |                 value: grafana | ||||||
|  |               - name: RESOURCE | ||||||
|  |                 value: configmap | ||||||
|  |             volumeMounts: | ||||||
|  |               - mountPath: /var/lib/grafana/dashboards | ||||||
|  |                 name: dashboards | ||||||
|  |           {{ end }} | ||||||
|  |           securityContext: | ||||||
|  |               fsGroup: 65534 | ||||||
|  |           volumes: | ||||||
|  |             #- name: grafana-storage | ||||||
|  |             - name: plugin-config | ||||||
|  |               configMap: | ||||||
|  |                 # Provide the name of the ConfigMap you want to mount. | ||||||
|  |                 name: grafana-config | ||||||
|  |                 # An array of keys from the ConfigMap to create as files | ||||||
|  |                 items: | ||||||
|  |                   - key: "oncall-app.yaml" | ||||||
|  |                     path: "oncall-app.yaml" | ||||||
|  |             {{ if .Values.nfc_monitoring.additions.dashboard_sidecar.enabled }} | ||||||
|  |             - name: dashboards | ||||||
|  |               emptyDir: {} | ||||||
|  |             - name: provisioning-config | ||||||
|  |               configMap: | ||||||
|  |                 name: provisioning-config | ||||||
|  |             {{ end }} | ||||||
|  |             - name: grafana-storage | ||||||
|  |               persistentVolumeClaim: | ||||||
|  |                 claimName: grafana-pvc | ||||||
|  |           serviceAccountName: grafana | ||||||
|  |           nodeSelector: | ||||||
|  |             kubernetes.io/os: linux | ||||||
|  |  | ||||||
|  |   persistentVolumeClaim: | ||||||
|  |     metadata: | ||||||
|  |       annotations: | ||||||
|  |         pv.beta.kubernetes.io/gid: "65534" | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/name: grafana | ||||||
|  |         app.kubernetes.io/component: graphing | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     spec: | ||||||
|  |       accessModes: | ||||||
|  |         - "ReadWriteMany" | ||||||
|  |       resources: | ||||||
|  |         requests: | ||||||
|  |           storage: "5Gi" | ||||||
							
								
								
									
										18
									
								
								templates/GrafanaDashboard-AlertManager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								templates/GrafanaDashboard-AlertManager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  | kind: GrafanaDashboard | ||||||
|  | metadata: | ||||||
|  |   name: alertmanager | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.grafana.namespace }} | ||||||
|  | spec: | ||||||
|  |   allowCrossNamespaceImport: true | ||||||
|  |   folder: No Fuss Monitoring | ||||||
|  |   resyncPeriod: 1d | ||||||
|  |   instanceSelector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: graphing | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: grafana | ||||||
|  |   grafanaCom: | ||||||
|  |     id: 9578 | ||||||
|  |     revision: 4 # as @ 19-09-23 | ||||||
							
								
								
									
										21
									
								
								templates/GrafanaDashboard-Ceph.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								templates/GrafanaDashboard-Ceph.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | --- | ||||||
|  | {{- if .Values.nfc_monitoring.additions.ceph.enabled | default false -}} | ||||||
|  | apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  | kind: GrafanaDashboard | ||||||
|  | metadata: | ||||||
|  |   name: ceph | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.grafana.namespace }} | ||||||
|  | spec: | ||||||
|  |   allowCrossNamespaceImport: true | ||||||
|  |   folder: No Fuss Monitoring | ||||||
|  |   resyncPeriod: 1d | ||||||
|  |   instanceSelector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: graphing | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: grafana | ||||||
|  |   grafanaCom: | ||||||
|  |     id: 2842 | ||||||
|  |     revision: 17 # as @ 19-09-23 | ||||||
|  |  | ||||||
|  | {{- end -}} | ||||||
							
								
								
									
										19
									
								
								templates/GrafanaDashboard-cluster-summary.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								templates/GrafanaDashboard-cluster-summary.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  | kind: GrafanaDashboard | ||||||
|  | metadata: | ||||||
|  |   name: cluster-summary | ||||||
|  |   namespace:  grafana | ||||||
|  | spec: | ||||||
|  |   allowCrossNamespaceImport: false | ||||||
|  |   folder: No Fuss Monitoring | ||||||
|  |   resyncPeriod: 30s | ||||||
|  |   instanceSelector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: graphing | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: grafana | ||||||
|  |    {{ $Dashboard := .Files.Get "files/dashboard-summary.json" | fromJson }} | ||||||
|  |   json: >- | ||||||
|  |     {{ $Dashboard | toRawJson }} | ||||||
|  |  | ||||||
							
								
								
									
										18
									
								
								templates/GrafanaDashboard-node-exporter-full.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								templates/GrafanaDashboard-node-exporter-full.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  | kind: GrafanaDashboard | ||||||
|  | metadata: | ||||||
|  |   name: node-exporter | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.grafana.namespace }} | ||||||
|  | spec: | ||||||
|  |   allowCrossNamespaceImport: true | ||||||
|  |   folder: No Fuss Monitoring | ||||||
|  |   resyncPeriod: 1d | ||||||
|  |   instanceSelector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: graphing | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: grafana | ||||||
|  |   grafanaCom: | ||||||
|  |     id: 1860 | ||||||
|  |     revision: 32 # as @ 19-09-23 | ||||||
							
								
								
									
										28
									
								
								templates/GrafanaDatasources.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								templates/GrafanaDatasources.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  | kind: GrafanaDatasourceList | ||||||
|  | items: | ||||||
|  |   {{ range .Values.nfc_monitoring.grafana.DataSources }} | ||||||
|  |   - apiVersion: grafana.integreatly.org/v1beta1 | ||||||
|  |     kind: GrafanaDatasource | ||||||
|  |     metadata: | ||||||
|  |       name: {{ .name }} | ||||||
|  |       namespace: "{{ $.Values.nfc_monitoring.grafana.namespace }}" | ||||||
|  |       labels: | ||||||
|  |         app.kubernetes.io/component: dashboard | ||||||
|  |         app.kubernetes.io/instance: k8s | ||||||
|  |         app.kubernetes.io/name: grafana | ||||||
|  |         app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |         app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |         app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     spec: | ||||||
|  |       instanceSelector: | ||||||
|  |         matchLabels: | ||||||
|  |           app.kubernetes.io/component: graphing | ||||||
|  |           app.kubernetes.io/instance: k8s | ||||||
|  |           app.kubernetes.io/name: grafana | ||||||
|  |       allowCrossNamespaceImport: true | ||||||
|  |       datasource: | ||||||
|  |         {{ toYaml . | nindent 8 }} | ||||||
|  |  | ||||||
|  |   {{ end }} | ||||||
							
								
								
									
										31
									
								
								templates/NetworkPolicy-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								templates/NetworkPolicy-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | |||||||
|  | # apiVersion: networking.k8s.io/v1 | ||||||
|  | # kind: NetworkPolicy | ||||||
|  | # metadata: | ||||||
|  | #   labels: | ||||||
|  | #     app.kubernetes.io/component: exporter | ||||||
|  | #     app.kubernetes.io/name: kube-state-metrics | ||||||
|  | #     app.kubernetes.io/part-of: kube-prometheus | ||||||
|  | #     app.kubernetes.io/version: 2.8.1 | ||||||
|  | #   name: kube-state-metrics | ||||||
|  | #   namespace: monitoring | ||||||
|  | # spec: | ||||||
|  | #   egress: | ||||||
|  | #   - {} | ||||||
|  | #   ingress: | ||||||
|  | #   - from: | ||||||
|  | #     - podSelector: | ||||||
|  | #         matchLabels: | ||||||
|  | #           app.kubernetes.io/name: prometheus | ||||||
|  | #     ports: | ||||||
|  | #     - port: 8443 | ||||||
|  | #       protocol: TCP | ||||||
|  | #     - port: 9443 | ||||||
|  | #       protocol: TCP | ||||||
|  | #   podSelector: | ||||||
|  | #     matchLabels: | ||||||
|  | #       app.kubernetes.io/component: exporter | ||||||
|  | #       app.kubernetes.io/name: kube-state-metrics | ||||||
|  | #       app.kubernetes.io/part-of: kube-prometheus | ||||||
|  | #   policyTypes: | ||||||
|  | #   - Egress | ||||||
|  | #   - Ingress | ||||||
							
								
								
									
										21
									
								
								templates/PodDisruptionBudget-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								templates/PodDisruptionBudget-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: policy/v1 | ||||||
|  | kind: PodDisruptionBudget | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: alertmanager-main | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }} | ||||||
|  | spec: | ||||||
|  |   maxUnavailable: 1 | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: alert-router | ||||||
|  |       app.kubernetes.io/instance: main | ||||||
|  |       app.kubernetes.io/name: alertmanager | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										21
									
								
								templates/PodDisruptionBudget-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								templates/PodDisruptionBudget-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: policy/v1 | ||||||
|  | kind: PodDisruptionBudget | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   minAvailable: 1 | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: metrics-adapter | ||||||
|  |       app.kubernetes.io/instance: main | ||||||
|  |       app.kubernetes.io/name: prometheus-adapter | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										20
									
								
								templates/PodDisruptionBudget-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/PodDisruptionBudget-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: policy/v1 | ||||||
|  | kind: PodDisruptionBudget | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   minAvailable: 1 | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: prometheus | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										64
									
								
								templates/Prometheus-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								templates/Prometheus-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,64 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: Prometheus | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: k8s | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.prometheus.namespace }}" | ||||||
|  | spec: | ||||||
|  |   affinity: | ||||||
|  |     {{- toYaml .Values.nfc_monitoring.prometheus.affinity | nindent 4 }} | ||||||
|  |   alerting: | ||||||
|  |     alertmanagers: | ||||||
|  |     - apiVersion: v2 | ||||||
|  |       name: alertmanager-main | ||||||
|  |       namespace: "{{ .Values.nfc_monitoring.alert_manager.namespace }}" | ||||||
|  |       port: web | ||||||
|  |   enableFeatures: [] | ||||||
|  |   externalLabels: {} | ||||||
|  |   image: quay.io/prometheus/prometheus:v2.42.0 | ||||||
|  |   nodeSelector: | ||||||
|  |     kubernetes.io/os: linux | ||||||
|  |   podMetadata: | ||||||
|  |     labels: | ||||||
|  |       app.kubernetes.io/component: prometheus | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: prometheus | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   podMonitorNamespaceSelector: {} | ||||||
|  |   podMonitorSelector: {} | ||||||
|  |   probeNamespaceSelector: {} | ||||||
|  |   probeSelector: {} | ||||||
|  |   replicas: 3 | ||||||
|  |   resources: | ||||||
|  |     requests: | ||||||
|  |       memory: 400Mi | ||||||
|  |   ruleNamespaceSelector: {} | ||||||
|  |   ruleSelector: {} | ||||||
|  |   securityContext: | ||||||
|  |     fsGroup: 2000 | ||||||
|  |     runAsNonRoot: true | ||||||
|  |     runAsUser: 1000 | ||||||
|  |   serviceAccountName: prometheus-k8s | ||||||
|  |   serviceMonitorNamespaceSelector: {} | ||||||
|  |   serviceMonitorSelector: {} | ||||||
|  |   storage: | ||||||
|  |     {{- toYaml .Values.nfc_monitoring.prometheus.affinity | nindent 4 }} | ||||||
|  |   {{ if .Values.nfc_monitoring.thanos.sidecar.enabled }} | ||||||
|  |   thanos: | ||||||
|  |     image: "{{ .Values.nfc_monitoring.thanos.image.name }}:{{ .Values.nfc_monitoring.thanos.image.tag }}" | ||||||
|  |     objectStorageConfig: | ||||||
|  |       key: thanos.yaml | ||||||
|  |       name: thanos-sidecar-config | ||||||
|  |   {{ end }} | ||||||
|  |   version: 2.42.0 | ||||||
|  |   {{ if .Values.nfc_monitoring.prometheus.additional }} | ||||||
|  |   {{ toYaml .Values.nfc_monitoring.prometheus.additional | nindent 2 }} | ||||||
|  |   {{ end }} | ||||||
							
								
								
									
										141
									
								
								templates/PrometheusRule-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								templates/PrometheusRule-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,141 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: alertmanager-main-rules | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: alertmanager.rules | ||||||
|  |     rules: | ||||||
|  |     - alert: AlertmanagerFailedReload | ||||||
|  |       annotations: | ||||||
|  |         description: Configuration has failed to load for {{ `{{` }} $labels.namespace }}/{{ `{{` }} $labels.pod}}. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload | ||||||
|  |         summary: Reloading an Alertmanager configuration has failed. | ||||||
|  |       expr: | | ||||||
|  |         # Without max_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |         max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: AlertmanagerMembersInconsistent | ||||||
|  |       annotations: | ||||||
|  |         description: Alertmanager {{ `{{` }} $labels.namespace }}/{{ `{{` }} $labels.pod}} has only found {{ `{{` }} $value }} members of the {{ `{{` }}$labels.job}} cluster. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent | ||||||
|  |         summary: A member of an Alertmanager cluster has not found all other cluster members. | ||||||
|  |       expr: | | ||||||
|  |         # Without max_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |           max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]) | ||||||
|  |         < on (namespace,service) group_left | ||||||
|  |           count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])) | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: AlertmanagerFailedToSendAlerts | ||||||
|  |       annotations: | ||||||
|  |         description: Alertmanager {{ `{{` }} $labels.namespace }}/{{ `{{` }} $labels.pod}} failed to send {{ `{{` }} $value | humanizePercentage }} of notifications to {{ `{{` }} $labels.integration }}. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts | ||||||
|  |         summary: An Alertmanager instance failed to send notifications. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m]) | ||||||
|  |         / | ||||||
|  |           rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m]) | ||||||
|  |         ) | ||||||
|  |         > 0.01 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: AlertmanagerClusterFailedToSendAlerts | ||||||
|  |       annotations: | ||||||
|  |         description: The minimum notification failure rate to {{ `{{` }} $labels.integration }} sent from any instance in the {{ `{{` }}$labels.job}} cluster is {{ `{{` }} $value | humanizePercentage }}. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts | ||||||
|  |         summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. | ||||||
|  |       expr: | | ||||||
|  |         min by (namespace,service, integration) ( | ||||||
|  |           rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m]) | ||||||
|  |         / | ||||||
|  |           rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m]) | ||||||
|  |         ) | ||||||
|  |         > 0.01 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: AlertmanagerClusterFailedToSendAlerts | ||||||
|  |       annotations: | ||||||
|  |         description: The minimum notification failure rate to {{ `{{` }} $labels.integration }} sent from any instance in the {{ `{{` }}$labels.job}} cluster is {{ `{{` }} $value | humanizePercentage }}. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts | ||||||
|  |         summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. | ||||||
|  |       expr: | | ||||||
|  |         min by (namespace,service, integration) ( | ||||||
|  |           rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m]) | ||||||
|  |         / | ||||||
|  |           rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m]) | ||||||
|  |         ) | ||||||
|  |         > 0.01 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: AlertmanagerConfigInconsistent | ||||||
|  |       annotations: | ||||||
|  |         description: Alertmanager instances within the {{ `{{` }}$labels.job}} cluster have different configurations. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent | ||||||
|  |         summary: Alertmanager instances within the same cluster have different configurations. | ||||||
|  |       expr: | | ||||||
|  |         count by (namespace,service) ( | ||||||
|  |           count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) | ||||||
|  |         ) | ||||||
|  |         != 1 | ||||||
|  |       for: 20m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: AlertmanagerClusterDown | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} $value | humanizePercentage }} of Alertmanager instances within the {{ `{{` }}$labels.job}} cluster have been up for less than half of the last 5m.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown | ||||||
|  |         summary: Half or more of the Alertmanager instances within the same cluster are down. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           count by (namespace,service) ( | ||||||
|  |             avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5 | ||||||
|  |           ) | ||||||
|  |         / | ||||||
|  |           count by (namespace,service) ( | ||||||
|  |             up{job="alertmanager-main",namespace="monitoring"} | ||||||
|  |           ) | ||||||
|  |         ) | ||||||
|  |         >= 0.5 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: AlertmanagerClusterCrashlooping | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} $value | humanizePercentage }} of Alertmanager instances within the {{ `{{` }}$labels.job}} cluster have restarted at least 5 times in the last 10m.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping | ||||||
|  |         summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           count by (namespace,service) ( | ||||||
|  |             changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4 | ||||||
|  |           ) | ||||||
|  |         / | ||||||
|  |           count by (namespace,service) ( | ||||||
|  |             up{job="alertmanager-main",namespace="monitoring"} | ||||||
|  |           ) | ||||||
|  |         ) | ||||||
|  |         >= 0.5 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
							
								
								
									
										23
									
								
								templates/PrometheusRule-grafana-agent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								templates/PrometheusRule-grafana-agent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: grafana-agent-promtail | ||||||
|  |   name: grafana-agent | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: grafana_agent | ||||||
|  |     rules: | ||||||
|  |     # - annotations: | ||||||
|  |     #     description: "As Grafana Agent is being used, it's version is set as promtails" | ||||||
|  |     - expr: | | ||||||
|  |         agent_build_info | ||||||
|  |       record: promtail_build_info | ||||||
							
								
								
									
										35
									
								
								templates/PrometheusRule-grafana.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								templates/PrometheusRule-grafana.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,35 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: grafana | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: grafana-rules | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: GrafanaAlerts | ||||||
|  |     rules: | ||||||
|  |     - alert: GrafanaRequestsFailing | ||||||
|  |       annotations: | ||||||
|  |         message: '{{`{{`}} $labels.namespace }}/{{`{{`}} $labels.job }}/{{`{{`}} $labels.handler }} is experiencing {{`{{`}} $value | humanize }}% errors' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing | ||||||
|  |       expr: | | ||||||
|  |         100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."} | ||||||
|  |         / ignoring (status_code) | ||||||
|  |         sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"}) | ||||||
|  |         > 50 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |   - name: grafana_rules | ||||||
|  |     rules: | ||||||
|  |     - expr: | | ||||||
|  |         sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m])) | ||||||
|  |       record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m | ||||||
							
								
								
									
										86
									
								
								templates/PrometheusRule-kubePrometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								templates/PrometheusRule-kubePrometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,86 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: kube-prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: kube-prometheus-rules | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: general.rules | ||||||
|  |     rules: | ||||||
|  |     - alert: TargetDown | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} printf "%.4g" $value }}% of the {{ `{{` }} $labels.job }}/{{ `{{` }} $labels.service }} targets in {{ `{{` }} $labels.namespace }} namespace are down.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown | ||||||
|  |         summary: One or more targets are unreachable. | ||||||
|  |       expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: Watchdog | ||||||
|  |       annotations: | ||||||
|  |         description: | | ||||||
|  |           This is an alert meant to ensure that the entire alerting pipeline is functional. | ||||||
|  |           This alert is always firing, therefore it should always be firing in Alertmanager | ||||||
|  |           and always fire against a receiver. There are integrations with various notification | ||||||
|  |           mechanisms that send a notification when this alert is not firing. For example the | ||||||
|  |           "DeadMansSnitch" integration in PagerDuty. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog | ||||||
|  |         summary: An alert that should always be firing to certify that Alertmanager is working properly. | ||||||
|  |       expr: vector(1) | ||||||
|  |       labels: | ||||||
|  |         severity: none | ||||||
|  |     - alert: InfoInhibitor | ||||||
|  |       annotations: | ||||||
|  |         description: | | ||||||
|  |           This is an alert that is used to inhibit info alerts. | ||||||
|  |           By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with | ||||||
|  |           other alerts. | ||||||
|  |           This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a | ||||||
|  |           severity of 'warning' or 'critical' starts firing on the same namespace. | ||||||
|  |           This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor | ||||||
|  |         summary: Info-level alert inhibition. | ||||||
|  |       expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 | ||||||
|  |       labels: | ||||||
|  |         severity: none | ||||||
|  |   - name: node-network | ||||||
|  |     rules: | ||||||
|  |     - alert: NodeNetworkInterfaceFlapping | ||||||
|  |       annotations: | ||||||
|  |         description: Network interface "{{ `{{` }} $labels.device }}" changing its up status often on node-exporter {{ `{{` }} $labels.namespace }}/{{ `{{` }} $labels.pod }} | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping | ||||||
|  |         summary: Network interface is often changing its status | ||||||
|  |       expr: | | ||||||
|  |         changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 | ||||||
|  |       for: 2m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |   - name: kube-prometheus-node-recording.rules | ||||||
|  |     rules: | ||||||
|  |     - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) | ||||||
|  |       record: instance:node_cpu:rate:sum | ||||||
|  |     - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) | ||||||
|  |       record: instance:node_network_receive_bytes:rate:sum | ||||||
|  |     - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) | ||||||
|  |       record: instance:node_network_transmit_bytes:rate:sum | ||||||
|  |     - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) | ||||||
|  |       record: instance:node_cpu:ratio | ||||||
|  |     - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) | ||||||
|  |       record: cluster:node_cpu:sum_rate5m | ||||||
|  |     - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) | ||||||
|  |       record: cluster:node_cpu:ratio | ||||||
|  |   - name: kube-prometheus-general.rules | ||||||
|  |     rules: | ||||||
|  |     - expr: count without(instance, pod, node) (up == 1) | ||||||
|  |       record: count:up1 | ||||||
|  |     - expr: count without(instance, pod, node) (up == 0) | ||||||
|  |       record: count:up0 | ||||||
							
								
								
									
										67
									
								
								templates/PrometheusRule-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								templates/PrometheusRule-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,67 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: kube-state-metrics-rules | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: kube-state-metrics | ||||||
|  |     rules: | ||||||
|  |     - alert: KubeStateMetricsListErrors | ||||||
|  |       annotations: | ||||||
|  |         description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors | ||||||
|  |         summary: kube-state-metrics is experiencing errors in list operations. | ||||||
|  |       expr: | | ||||||
|  |         (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) | ||||||
|  |           / | ||||||
|  |         sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m]))) | ||||||
|  |         > 0.01 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: KubeStateMetricsWatchErrors | ||||||
|  |       annotations: | ||||||
|  |         description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors | ||||||
|  |         summary: kube-state-metrics is experiencing errors in watch operations. | ||||||
|  |       expr: | | ||||||
|  |         (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) | ||||||
|  |           / | ||||||
|  |         sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m]))) | ||||||
|  |         > 0.01 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: KubeStateMetricsShardingMismatch | ||||||
|  |       annotations: | ||||||
|  |         description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch | ||||||
|  |         summary: kube-state-metrics sharding is misconfigured. | ||||||
|  |       expr: | | ||||||
|  |         stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: KubeStateMetricsShardsMissing | ||||||
|  |       annotations: | ||||||
|  |         description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing | ||||||
|  |         summary: kube-state-metrics shards are missing. | ||||||
|  |       expr: | | ||||||
|  |         2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1 | ||||||
|  |           - | ||||||
|  |         sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) | ||||||
|  |         != 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
							
								
								
									
										1441
									
								
								templates/PrometheusRule-kubernetesControlPlane.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1441
									
								
								templates/PrometheusRule-kubernetesControlPlane.yaml
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										112
									
								
								templates/PrometheusRule-loki.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										112
									
								
								templates/PrometheusRule-loki.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,112 @@ | |||||||
|  | --- | ||||||
|  | {{- if .Values.nfc_monitoring.loki.enabled | default false -}} | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: logging | ||||||
|  |     app.kubernetes.io/name: loki | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: loki | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: loki_rules | ||||||
|  |     rules: | ||||||
|  |     - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) | ||||||
|  |         by (le, cluster, job)) | ||||||
|  |       record: cluster_job:loki_request_duration_seconds:99quantile | ||||||
|  |     - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) | ||||||
|  |         by (le, cluster, job)) | ||||||
|  |       record: cluster_job:loki_request_duration_seconds:50quantile | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m])) | ||||||
|  |         by (cluster, job) | ||||||
|  |       record: cluster_job:loki_request_duration_seconds:avg | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job) | ||||||
|  |       record: cluster_job:loki_request_duration_seconds_bucket:sum_rate | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) | ||||||
|  |       record: cluster_job:loki_request_duration_seconds_sum:sum_rate | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job) | ||||||
|  |       record: cluster_job:loki_request_duration_seconds_count:sum_rate | ||||||
|  |     - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) | ||||||
|  |         by (le, cluster, job, route)) | ||||||
|  |       record: cluster_job_route:loki_request_duration_seconds:99quantile | ||||||
|  |     - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) | ||||||
|  |         by (le, cluster, job, route)) | ||||||
|  |       record: cluster_job_route:loki_request_duration_seconds:50quantile | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) | ||||||
|  |         / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) | ||||||
|  |       record: cluster_job_route:loki_request_duration_seconds:avg | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, | ||||||
|  |         route) | ||||||
|  |       record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) | ||||||
|  |       record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) | ||||||
|  |       record: cluster_job_route:loki_request_duration_seconds_count:sum_rate | ||||||
|  |     - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) | ||||||
|  |         by (le, cluster, namespace, job, route)) | ||||||
|  |       record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile | ||||||
|  |     - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) | ||||||
|  |         by (le, cluster, namespace, job, route)) | ||||||
|  |       record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, | ||||||
|  |         job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, | ||||||
|  |         namespace, job, route) | ||||||
|  |       record: cluster_namespace_job_route:loki_request_duration_seconds:avg | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, | ||||||
|  |         job, route) | ||||||
|  |       record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, | ||||||
|  |         job, route) | ||||||
|  |       record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate | ||||||
|  |     - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, | ||||||
|  |         job, route) | ||||||
|  |       record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate | ||||||
|  |  | ||||||
|  |   - name: loki_alerts | ||||||
|  |     rules: | ||||||
|  |     - alert: LokiRequestErrors | ||||||
|  |       annotations: | ||||||
|  |         message: | | ||||||
|  |           {{ `{{` }} $labels.job }} {{ `{{` }} $labels.route }} is experiencing {{ `{{` }} printf "%.2f" $value }}% errors. | ||||||
|  |       expr: | | ||||||
|  |         100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) | ||||||
|  |           / | ||||||
|  |         sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) | ||||||
|  |           > 10 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: LokiRequestPanics | ||||||
|  |       annotations: | ||||||
|  |         message: | | ||||||
|  |           {{ `{{` }} $labels.job }} is experiencing {{ `{{` }} printf "%.2f" $value }}% increase of panics. | ||||||
|  |       expr: | | ||||||
|  |         sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: LokiRequestLatency | ||||||
|  |       annotations: | ||||||
|  |         message: | | ||||||
|  |           {{ `{{` }} $labels.job }} {{ `{{` }} $labels.route }} is experiencing {{ `{{` }} printf "%.2f" $value }}s 99th percentile latency. | ||||||
|  |       expr: | | ||||||
|  |         cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: LokiTooManyCompactorsRunning | ||||||
|  |       annotations: | ||||||
|  |         message: | | ||||||
|  |           {{ `{{` }} $labels.cluster }} {{ `{{` }} $labels.namespace }} has had {{ `{{` }} printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. | ||||||
|  |       expr: | | ||||||
|  |         sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |  | ||||||
|  | {{- end -}} | ||||||
							
								
								
									
										318
									
								
								templates/PrometheusRule-nodeExporter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										318
									
								
								templates/PrometheusRule-nodeExporter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,318 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: node-exporter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: node-exporter-rules | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: node-exporter | ||||||
|  |     rules: | ||||||
|  |     - alert: NodeFilesystemSpaceFillingUp | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available space left and is filling up. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup | ||||||
|  |         summary: Filesystem is predicted to run out of space within the next 24 hours. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 | ||||||
|  |         and | ||||||
|  |           predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeFilesystemSpaceFillingUp | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available space left and is filling up fast. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup | ||||||
|  |         summary: Filesystem is predicted to run out of space within the next 4 hours. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 | ||||||
|  |         and | ||||||
|  |           predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: NodeFilesystemAlmostOutOfSpace | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available space left. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace | ||||||
|  |         summary: Filesystem has less than 5% space left. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 30m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeFilesystemAlmostOutOfSpace | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available space left. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace | ||||||
|  |         summary: Filesystem has less than 3% space left. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 30m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: NodeFilesystemFilesFillingUp | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available inodes left and is filling up. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup | ||||||
|  |         summary: Filesystem is predicted to run out of inodes within the next 24 hours. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 | ||||||
|  |         and | ||||||
|  |           predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeFilesystemFilesFillingUp | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available inodes left and is filling up fast. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup | ||||||
|  |         summary: Filesystem is predicted to run out of inodes within the next 4 hours. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 | ||||||
|  |         and | ||||||
|  |           predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: NodeFilesystemAlmostOutOfFiles | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available inodes left. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles | ||||||
|  |         summary: Filesystem has less than 5% inodes left. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeFilesystemAlmostOutOfFiles | ||||||
|  |       annotations: | ||||||
|  |         description: Filesystem on {{ `{{` }} $labels.device }} at {{ `{{` }} $labels.instance }} has only {{ `{{` }} printf "%.2f" $value }}% available inodes left. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles | ||||||
|  |         summary: Filesystem has less than 3% inodes left. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 | ||||||
|  |         and | ||||||
|  |           node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 | ||||||
|  |         ) | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: NodeNetworkReceiveErrs | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} $labels.instance }} interface {{ `{{` }} $labels.device }} has encountered {{ `{{` }} printf "%.0f" $value }} receive errors in the last two minutes.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs | ||||||
|  |         summary: Network interface is reporting many receive errors. | ||||||
|  |       expr: | | ||||||
|  |         rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01 | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeNetworkTransmitErrs | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} $labels.instance }} interface {{ `{{` }} $labels.device }} has encountered {{ `{{` }} printf "%.0f" $value }} transmit errors in the last two minutes.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs | ||||||
|  |         summary: Network interface is reporting many transmit errors. | ||||||
|  |       expr: | | ||||||
|  |         rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01 | ||||||
|  |       for: 1h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeHighNumberConntrackEntriesUsed | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} $value | humanizePercentage }} of conntrack entries are used.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused | ||||||
|  |         summary: Number of conntrack are getting close to the limit. | ||||||
|  |       expr: | | ||||||
|  |         (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75 | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeTextFileCollectorScrapeError | ||||||
|  |       annotations: | ||||||
|  |         description: Node Exporter text file collector failed to scrape. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror | ||||||
|  |         summary: Node Exporter text file collector failed to scrape. | ||||||
|  |       expr: | | ||||||
|  |         node_textfile_scrape_error{job="node-exporter"} == 1 | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeClockSkewDetected | ||||||
|  |       annotations: | ||||||
|  |         description: Clock on {{ `{{` }} $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected | ||||||
|  |         summary: Clock skew detected. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_timex_offset_seconds{job="node-exporter"} > 0.05 | ||||||
|  |         and | ||||||
|  |           deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 | ||||||
|  |         ) | ||||||
|  |         or | ||||||
|  |         ( | ||||||
|  |           node_timex_offset_seconds{job="node-exporter"} < -0.05 | ||||||
|  |         and | ||||||
|  |           deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 | ||||||
|  |         ) | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeClockNotSynchronising | ||||||
|  |       annotations: | ||||||
|  |         description: Clock on {{ `{{` }} $labels.instance }} is not synchronising. Ensure NTP is configured on this host. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising | ||||||
|  |         summary: Clock not synchronising. | ||||||
|  |       expr: | | ||||||
|  |         min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 | ||||||
|  |         and | ||||||
|  |         node_timex_maxerror_seconds{job="node-exporter"} >= 16 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeRAIDDegraded | ||||||
|  |       annotations: | ||||||
|  |         description: RAID array '{{ `{{` }} $labels.device }}' on {{ `{{` }} $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded | ||||||
|  |         summary: RAID Array is degraded | ||||||
|  |       expr: | | ||||||
|  |         node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: NodeRAIDDiskFailure | ||||||
|  |       annotations: | ||||||
|  |         description: At least one device in RAID array on {{ `{{` }} $labels.instance }} failed. Array '{{ `{{` }} $labels.device }}' needs attention and possibly a disk swap. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure | ||||||
|  |         summary: Failed device in RAID array | ||||||
|  |       expr: | | ||||||
|  |         node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeFileDescriptorLimit | ||||||
|  |       annotations: | ||||||
|  |         description: File descriptors limit at {{ `{{` }} $labels.instance }} is currently at {{ `{{` }} printf "%.2f" $value }}%. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit | ||||||
|  |         summary: Kernel is predicted to exhaust file descriptors limit soon. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 | ||||||
|  |         ) | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: NodeFileDescriptorLimit | ||||||
|  |       annotations: | ||||||
|  |         description: File descriptors limit at {{ `{{` }} $labels.instance }} is currently at {{ `{{` }} printf "%.2f" $value }}%. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit | ||||||
|  |         summary: Kernel is predicted to exhaust file descriptors limit soon. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 | ||||||
|  |         ) | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |   - name: node-exporter.rules | ||||||
|  |     rules: | ||||||
|  |     - expr: | | ||||||
|  |         count without (cpu, mode) ( | ||||||
|  |           node_cpu_seconds_total{job="node-exporter",mode="idle"} | ||||||
|  |         ) | ||||||
|  |       record: instance:node_num_cpu:sum | ||||||
|  |     - expr: | | ||||||
|  |         1 - avg without (cpu) ( | ||||||
|  |           sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) | ||||||
|  |         ) | ||||||
|  |       record: instance:node_cpu_utilisation:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         ( | ||||||
|  |           node_load1{job="node-exporter"} | ||||||
|  |         / | ||||||
|  |           instance:node_num_cpu:sum{job="node-exporter"} | ||||||
|  |         ) | ||||||
|  |       record: instance:node_load1_per_cpu:ratio | ||||||
|  |     - expr: | | ||||||
|  |         1 - ( | ||||||
|  |           ( | ||||||
|  |             node_memory_MemAvailable_bytes{job="node-exporter"} | ||||||
|  |             or | ||||||
|  |             ( | ||||||
|  |               node_memory_Buffers_bytes{job="node-exporter"} | ||||||
|  |               + | ||||||
|  |               node_memory_Cached_bytes{job="node-exporter"} | ||||||
|  |               + | ||||||
|  |               node_memory_MemFree_bytes{job="node-exporter"} | ||||||
|  |               + | ||||||
|  |               node_memory_Slab_bytes{job="node-exporter"} | ||||||
|  |             ) | ||||||
|  |           ) | ||||||
|  |         / | ||||||
|  |           node_memory_MemTotal_bytes{job="node-exporter"} | ||||||
|  |         ) | ||||||
|  |       record: instance:node_memory_utilisation:ratio | ||||||
|  |     - expr: | | ||||||
|  |         rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) | ||||||
|  |       record: instance:node_vmstat_pgmajfault:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) | ||||||
|  |       record: instance_device:node_disk_io_time_seconds:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) | ||||||
|  |       record: instance_device:node_disk_io_time_weighted_seconds:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         sum without (device) ( | ||||||
|  |           rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) | ||||||
|  |         ) | ||||||
|  |       record: instance:node_network_receive_bytes_excluding_lo:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         sum without (device) ( | ||||||
|  |           rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) | ||||||
|  |         ) | ||||||
|  |       record: instance:node_network_transmit_bytes_excluding_lo:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         sum without (device) ( | ||||||
|  |           rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) | ||||||
|  |         ) | ||||||
|  |       record: instance:node_network_receive_drop_excluding_lo:rate5m | ||||||
|  |     - expr: | | ||||||
|  |         sum without (device) ( | ||||||
|  |           rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) | ||||||
|  |         ) | ||||||
|  |       record: instance:node_network_transmit_drop_excluding_lo:rate5m | ||||||
							
								
								
									
										281
									
								
								templates/PrometheusRule-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										281
									
								
								templates/PrometheusRule-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,281 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: PrometheusRule | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     prometheus: k8s | ||||||
|  |     role: alert-rules | ||||||
|  |   name: prometheus-k8s-prometheus-rules | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   groups: | ||||||
|  |   - name: prometheus | ||||||
|  |     rules: | ||||||
|  |     - alert: PrometheusBadConfig | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has failed to reload its configuration. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig | ||||||
|  |         summary: Failed Prometheus configuration reload. | ||||||
|  |       expr: | | ||||||
|  |         # Without max_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |         max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: PrometheusNotificationQueueRunningFull | ||||||
|  |       annotations: | ||||||
|  |         description: Alert notification queue of Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} is running full. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull | ||||||
|  |         summary: Prometheus alert notification queue predicted to run full in less than 30m. | ||||||
|  |       expr: | | ||||||
|  |         # Without min_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |         ( | ||||||
|  |           predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) | ||||||
|  |         > | ||||||
|  |           min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         ) | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} printf "%.1f" $value }}% errors while sending alerts from Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} to Alertmanager {{ `{{` }}$labels.alertmanager}}.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers | ||||||
|  |         summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         / | ||||||
|  |           rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         ) | ||||||
|  |         * 100 | ||||||
|  |         > 1 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusNotConnectedToAlertmanagers | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} is not connected to any Alertmanagers. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers | ||||||
|  |         summary: Prometheus is not connected to any Alertmanagers. | ||||||
|  |       expr: | | ||||||
|  |         # Without max_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |         max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusTSDBReloadsFailing | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has detected {{ `{{` }}$value | humanize}} reload failures over the last 3h. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing | ||||||
|  |         summary: Prometheus has issues reloading blocks from disk. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 | ||||||
|  |       for: 4h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusTSDBCompactionsFailing | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has detected {{ `{{` }}$value | humanize}} compaction failures over the last 3h. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing | ||||||
|  |         summary: Prometheus has issues compacting blocks. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 | ||||||
|  |       for: 4h | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusNotIngestingSamples | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} is not ingesting samples. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples | ||||||
|  |         summary: Prometheus is not ingesting samples. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 | ||||||
|  |         and | ||||||
|  |           ( | ||||||
|  |             sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0 | ||||||
|  |           or | ||||||
|  |             sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0 | ||||||
|  |           ) | ||||||
|  |         ) | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusDuplicateTimestamps | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} is dropping {{ `{{` }} printf "%.4g" $value  }} samples/s with different values but duplicated timestamp. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps | ||||||
|  |         summary: Prometheus is dropping samples with duplicate timestamps. | ||||||
|  |       expr: | | ||||||
|  |         rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusOutOfOrderTimestamps | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} is dropping {{ `{{` }} printf "%.4g" $value  }} samples/s with timestamps arriving out of order. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps | ||||||
|  |         summary: Prometheus drops samples with out-of-order timestamps. | ||||||
|  |       expr: | | ||||||
|  |         rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 10m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusRemoteStorageFailures | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} failed to send {{ `{{` }} printf "%.1f" $value }}% of the samples to {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }} | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures | ||||||
|  |         summary: Prometheus fails to send samples to remote storage. | ||||||
|  |       expr: | | ||||||
|  |         ( | ||||||
|  |           (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m])) | ||||||
|  |         / | ||||||
|  |           ( | ||||||
|  |             (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m])) | ||||||
|  |           + | ||||||
|  |             (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])) | ||||||
|  |           ) | ||||||
|  |         ) | ||||||
|  |         * 100 | ||||||
|  |         > 1 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: PrometheusRemoteWriteBehind | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} remote write is {{ `{{` }} printf "%.1f" $value }}s behind for {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }}. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind | ||||||
|  |         summary: Prometheus remote write is behind. | ||||||
|  |       expr: | | ||||||
|  |         # Without max_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |         ( | ||||||
|  |           max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         - ignoring(remote_name, url) group_right | ||||||
|  |           max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         ) | ||||||
|  |         > 120 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: PrometheusRemoteWriteDesiredShards | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} remote write desired shards calculation wants to run {{ `{{` }} $value }} shards for queue {{ `{{` }} $labels.remote_name}}:{{ `{{` }} $labels.url }}, which is more than the max of {{ `{{` }} printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards | ||||||
|  |         summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. | ||||||
|  |       expr: | | ||||||
|  |         # Without max_over_time, failed scrapes could create false negatives, see | ||||||
|  |         # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. | ||||||
|  |         ( | ||||||
|  |           max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         > | ||||||
|  |           max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m]) | ||||||
|  |         ) | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusRuleFailures | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has failed to evaluate {{ `{{` }} printf "%.0f" $value }} rules in the last 5m. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures | ||||||
|  |         summary: Prometheus is failing rule evaluations. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: PrometheusMissingRuleEvaluations | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has missed {{ `{{` }} printf "%.0f" $value }} rule group evaluations in the last 5m. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations | ||||||
|  |         summary: Prometheus is missing rule evaluations due to slow rule group evaluation. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusTargetLimitHit | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has dropped {{ `{{` }} printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit | ||||||
|  |         summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusLabelLimitHit | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has dropped {{ `{{` }} printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit | ||||||
|  |         summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusScrapeBodySizeLimitHit | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has failed {{ `{{` }} printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit | ||||||
|  |         summary: Prometheus has dropped some targets that exceeded body size limit. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusScrapeSampleLimitHit | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} has failed {{ `{{` }} printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit | ||||||
|  |         summary: Prometheus has failed scrapes that have exceeded the configured sample limit. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusTargetSyncFailure | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} printf "%.0f" $value }} targets in Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} have failed to sync because invalid configuration was supplied.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure | ||||||
|  |         summary: Prometheus has failed to sync targets. | ||||||
|  |       expr: | | ||||||
|  |         increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0 | ||||||
|  |       for: 5m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
|  |     - alert: PrometheusHighQueryLoad | ||||||
|  |       annotations: | ||||||
|  |         description: Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes. | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload | ||||||
|  |         summary: Prometheus is reaching its maximum capacity serving concurrent requests. | ||||||
|  |       expr: | | ||||||
|  |         avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: warning | ||||||
|  |     - alert: PrometheusErrorSendingAlertsToAnyAlertmanager | ||||||
|  |       annotations: | ||||||
|  |         description: '{{ `{{` }} printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{ `{{` }}$labels.namespace}}/{{ `{{` }}$labels.pod}} to any Alertmanager.' | ||||||
|  |         runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager | ||||||
|  |         summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. | ||||||
|  |       expr: | | ||||||
|  |         min without (alertmanager) ( | ||||||
|  |           rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m]) | ||||||
|  |         / | ||||||
|  |           rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m]) | ||||||
|  |         ) | ||||||
|  |         * 100 | ||||||
|  |         > 3 | ||||||
|  |       for: 15m | ||||||
|  |       labels: | ||||||
|  |         severity: critical | ||||||
							
								
								
									
										47
									
								
								templates/Role-SpecificNamespaces-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								templates/Role-SpecificNamespaces-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,47 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | items: | ||||||
|  |  | ||||||
|  | {{ range .Values.nfc_monitoring.prometheus.monitor_namespaces }} | ||||||
|  | - apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  |   kind: Role | ||||||
|  |   metadata: | ||||||
|  |     labels: | ||||||
|  |       app.kubernetes.io/component: prometheus | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: prometheus | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     name: prometheus-k8s | ||||||
|  |     namespace: {{ . | quote }} | ||||||
|  |   rules: | ||||||
|  |   - apiGroups: | ||||||
|  |     - "" | ||||||
|  |     resources: | ||||||
|  |     - services | ||||||
|  |     - endpoints | ||||||
|  |     - pods | ||||||
|  |     verbs: | ||||||
|  |     - get | ||||||
|  |     - list | ||||||
|  |     - watch | ||||||
|  |   - apiGroups: | ||||||
|  |     - extensions | ||||||
|  |     resources: | ||||||
|  |     - ingresses | ||||||
|  |     verbs: | ||||||
|  |     - get | ||||||
|  |     - list | ||||||
|  |     - watch | ||||||
|  |   - apiGroups: | ||||||
|  |     - networking.k8s.io | ||||||
|  |     resources: | ||||||
|  |     - ingresses | ||||||
|  |     verbs: | ||||||
|  |     - get | ||||||
|  |     - list | ||||||
|  |     - watch | ||||||
|  | {{ end }} | ||||||
|  |  | ||||||
|  | kind: RoleList | ||||||
							
								
								
									
										21
									
								
								templates/RoleBinding-Config-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								templates/RoleBinding-Config-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: RoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s-config | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: Role | ||||||
|  |   name: prometheus-k8s-config | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: prometheus-k8s | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
							
								
								
									
										27
									
								
								templates/RoleBinding-SpecificNamespaces-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								templates/RoleBinding-SpecificNamespaces-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | items: | ||||||
|  | {{ range .Values.nfc_monitoring.prometheus.monitor_namespaces }} | ||||||
|  | - apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  |   kind: RoleBinding | ||||||
|  |   metadata: | ||||||
|  |     labels: | ||||||
|  |       app.kubernetes.io/component: prometheus | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: prometheus | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |       app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |       app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     name: prometheus-k8s | ||||||
|  |     namespace: {{ . | quote }} | ||||||
|  |   roleRef: | ||||||
|  |     apiGroup: rbac.authorization.k8s.io | ||||||
|  |     kind: Role | ||||||
|  |     name: prometheus-k8s | ||||||
|  |   subjects: | ||||||
|  |   - kind: ServiceAccount | ||||||
|  |     name: prometheus-k8s | ||||||
|  |     namespace: {{ $.Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | {{ end }} | ||||||
|  |  | ||||||
|  | kind: RoleBindingList | ||||||
							
								
								
									
										18
									
								
								templates/RoleBinding-prometheus-adapter-auth-reader.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								templates/RoleBinding-prometheus-adapter-auth-reader.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: RoleBinding | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/version: 0.11.1 | ||||||
|  |   name: resource-metrics-auth-reader | ||||||
|  |   namespace: kube-system | ||||||
|  | roleRef: | ||||||
|  |   apiGroup: rbac.authorization.k8s.io | ||||||
|  |   kind: Role | ||||||
|  |   name: extension-apiserver-authentication-reader | ||||||
|  | subjects: | ||||||
|  | - kind: ServiceAccount | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										20
									
								
								templates/RoleConfig-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/RoleConfig-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: rbac.authorization.k8s.io/v1 | ||||||
|  | kind: Role | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s-config | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | rules: | ||||||
|  | - apiGroups: | ||||||
|  |   - "" | ||||||
|  |   resources: | ||||||
|  |   - configmaps | ||||||
|  |   verbs: | ||||||
|  |   - get | ||||||
							
								
								
									
										61
									
								
								templates/Secret-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								templates/Secret-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,61 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Secret | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: alertmanager-main | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }} | ||||||
|  | stringData: | ||||||
|  |   alertmanager.yaml: |- | ||||||
|  |     "global": | ||||||
|  |       "resolve_timeout": "5m" | ||||||
|  |     "inhibit_rules": | ||||||
|  |     - "equal": | ||||||
|  |       - "namespace" | ||||||
|  |       - "alertname" | ||||||
|  |       "source_matchers": | ||||||
|  |       - "severity = critical" | ||||||
|  |       "target_matchers": | ||||||
|  |       - "severity =~ warning|info" | ||||||
|  |     - "equal": | ||||||
|  |       - "namespace" | ||||||
|  |       - "alertname" | ||||||
|  |       "source_matchers": | ||||||
|  |       - "severity = warning" | ||||||
|  |       "target_matchers": | ||||||
|  |       - "severity = info" | ||||||
|  |     - "equal": | ||||||
|  |       - "namespace" | ||||||
|  |       "source_matchers": | ||||||
|  |       - "alertname = InfoInhibitor" | ||||||
|  |       "target_matchers": | ||||||
|  |       - "severity = info" | ||||||
|  |     "receivers": | ||||||
|  |     - "name": "Default" | ||||||
|  |     - "name": "Watchdog" | ||||||
|  |     - "name": "Critical" | ||||||
|  |     - "name": "null" | ||||||
|  |     "route": | ||||||
|  |       "group_by": | ||||||
|  |       - "namespace" | ||||||
|  |       "group_interval": "5m" | ||||||
|  |       "group_wait": "30s" | ||||||
|  |       "receiver": "Default" | ||||||
|  |       "repeat_interval": "12h" | ||||||
|  |       "routes": | ||||||
|  |       - "matchers": | ||||||
|  |         - "alertname = Watchdog" | ||||||
|  |         "receiver": "Watchdog" | ||||||
|  |       - "matchers": | ||||||
|  |         - "alertname = InfoInhibitor" | ||||||
|  |         "receiver": "null" | ||||||
|  |       - "matchers": | ||||||
|  |         - "severity = critical" | ||||||
|  |         "receiver": "Critical" | ||||||
|  | type: Opaque | ||||||
							
								
								
									
										13
									
								
								templates/Secret-prometheus-sidecar-thanos.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								templates/Secret-prometheus-sidecar-thanos.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | |||||||
|  | {{ if .Values.nfc_monitoring.thanos.sidecar.enabled }} | ||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Secret | ||||||
|  | metadata: | ||||||
|  |   name: thanos-sidecar-config | ||||||
|  |   namespace: monitoring | ||||||
|  | type: Opaque | ||||||
|  | stringData: | ||||||
|  |   thanos.yaml: |- | ||||||
|  |     {{ toYaml .Values.nfc_monitoring.thanos.sidecar.config | nindent 4 }} | ||||||
|  |  | ||||||
|  | {{ end }} | ||||||
							
								
								
									
										20
									
								
								templates/Service-CalicoMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								templates/Service-CalicoMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | {{- if eq .Values.nfc_monitoring.kubernetes.networking "calico" -}} | ||||||
|  |  | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   name: calico-metrics | ||||||
|  |   namespace: kube-system | ||||||
|  |   labels: | ||||||
|  |     k8s-app: calico-node | ||||||
|  | spec: | ||||||
|  |   clusterIP: None | ||||||
|  |   selector: | ||||||
|  |     k8s-app: calico-node | ||||||
|  |   ports: | ||||||
|  |   - port: 9091 | ||||||
|  |     name: metrics | ||||||
|  |     targetPort: 9091 | ||||||
|  |  | ||||||
|  | {{- end -}} | ||||||
							
								
								
									
										30
									
								
								templates/Service-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								templates/Service-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | |||||||
|  | --- | ||||||
|  |  | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   name: grafana | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.grafana.namespace }} | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | spec: | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |   #type: NodePort | ||||||
|  |   #type: LoadBalancer | ||||||
|  |   #clusterIP: None | ||||||
|  |   ports: | ||||||
|  |     - name: grafana | ||||||
|  |       port: 3000 | ||||||
|  |       targetPort: grafana-http | ||||||
|  |       #nodePort: 3000 | ||||||
|  |   #type: LoadBalancer | ||||||
|  |   sessionAffinity: ClientIP | ||||||
							
								
								
									
										28
									
								
								templates/Service-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								templates/Service-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   name: grafana-agent | ||||||
|  |   namespace: monitoring | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  | spec: | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |   ports: | ||||||
|  |     - name: grafana-metrics | ||||||
|  |       port: 12345 | ||||||
|  |       targetPort: grafana-metrics | ||||||
|  |     - name: kube-ctrl-mgr | ||||||
|  |       port: 11257 | ||||||
|  |       targetPort: kube-ctrl-mgr | ||||||
|  |   #type: LoadBalancer | ||||||
|  |   sessionAffinity: ClientIP | ||||||
							
								
								
									
										27
									
								
								templates/Service-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								templates/Service-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: alertmanager-main | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }} | ||||||
|  | spec: | ||||||
|  |   ports: | ||||||
|  |   - name: web | ||||||
|  |     port: 9093 | ||||||
|  |     targetPort: web | ||||||
|  |   - name: reloader-web | ||||||
|  |     port: 8080 | ||||||
|  |     targetPort: reloader-web | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |   sessionAffinity: ClientIP | ||||||
							
								
								
									
										30
									
								
								templates/Service-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								templates/Service-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  |   namespace: monitoring | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     name_kcm: kube-controller-manager | ||||||
|  |     name_ks: kube-scheduler | ||||||
|  | spec: | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |      | ||||||
|  |   ports: | ||||||
|  |     - name: kube-ctrl-mgr | ||||||
|  |       port: 10257 | ||||||
|  |       targetPort: kube-ctrl-mgr | ||||||
|  |     - name: kube-scheduler | ||||||
|  |       port: 10259 | ||||||
|  |       targetPort: kube-scheduler | ||||||
|  |   sessionAffinity: ClientIP | ||||||
							
								
								
									
										28
									
								
								templates/Service-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								templates/Service-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-state-metrics | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   clusterIP: None | ||||||
|  |   ports: | ||||||
|  |   - name: https-main | ||||||
|  |     port: 8443 | ||||||
|  |     targetPort: https-main | ||||||
|  |   - name: https-self | ||||||
|  |     port: 9443 | ||||||
|  |     targetPort: https-self | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |  | ||||||
							
								
								
									
										23
									
								
								templates/Service-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								templates/Service-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   ports: | ||||||
|  |   - name: https | ||||||
|  |     port: 443 | ||||||
|  |     targetPort: 6443 | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										59
									
								
								templates/Service-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								templates/Service-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,59 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   ports: | ||||||
|  |   - name: web | ||||||
|  |     port: 9090 | ||||||
|  |     targetPort: web | ||||||
|  |   - name: reloader-web | ||||||
|  |     port: 8080 | ||||||
|  |     targetPort: reloader-web | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |   sessionAffinity: ClientIP | ||||||
|  |  | ||||||
|  | {{ if .Values.nfc_monitoring.thanos.sidecar.enabled }} | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: Service | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus-sidecar | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: thanos-sidecar | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: thanos-sidecar | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
|  | spec: | ||||||
|  |   clusterIP: None | ||||||
|  |   ports: | ||||||
|  |   - name: grpc | ||||||
|  |     port: 10901 | ||||||
|  |     targetPort: 10901 | ||||||
|  |   - name: http | ||||||
|  |     port: 10902 | ||||||
|  |     targetPort: 10902 | ||||||
|  |   selector: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |  | ||||||
|  | {{ end }} | ||||||
							
								
								
									
										14
									
								
								templates/ServiceAccount-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								templates/ServiceAccount-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | automountServiceAccountToken: false | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   name: grafana | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.grafana.namespace }} | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
							
								
								
									
										14
									
								
								templates/ServiceAccount-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								templates/ServiceAccount-GrafanaAgent.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | automountServiceAccountToken: false | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   name: grafana-agent | ||||||
|  |   namespace: monitoring | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: grafana-agent | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
							
								
								
									
										14
									
								
								templates/ServiceAccount-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								templates/ServiceAccount-alertmanager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | automountServiceAccountToken: false | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: alert-router | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: alertmanager | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: alertmanager-main | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.alert_manager.namespace | quote }} | ||||||
							
								
								
									
										13
									
								
								templates/ServiceAccount-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								templates/ServiceAccount-kube-monitor-proxy.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   name: kube-monitor-proxy | ||||||
|  |   namespace: monitoring | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
							
								
								
									
										14
									
								
								templates/ServiceAccount-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								templates/ServiceAccount-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | automountServiceAccountToken: false | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-state-metrics | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										14
									
								
								templates/ServiceAccount-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								templates/ServiceAccount-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | automountServiceAccountToken: false | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
							
								
								
									
										14
									
								
								templates/ServiceAccount-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								templates/ServiceAccount-prometheus.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: v1 | ||||||
|  | automountServiceAccountToken: true | ||||||
|  | kind: ServiceAccount | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: prometheus | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: prometheus | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-k8s | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.prometheus.namespace }} | ||||||
							
								
								
									
										78
									
								
								templates/ServiceMonitor-APIServer.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								templates/ServiceMonitor-APIServer.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,78 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/name: apiserver | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |   name: kube-apiserver | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 30s | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: transformation_(transformation_latencies_microseconds|failures_total) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: etcd_(debugging|disk|server).* | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: apiserver_admission_controller_admission_latencies_seconds_.* | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: apiserver_admission_step_admission_latencies_seconds_.* | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |       - le | ||||||
|  |     port: https | ||||||
|  |     scheme: https | ||||||
|  |     tlsConfig: | ||||||
|  |       caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | ||||||
|  |       serverName: kubernetes | ||||||
|  |   jobLabel: component | ||||||
|  |   namespaceSelector: | ||||||
|  |     matchNames: | ||||||
|  |     - default | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       component: apiserver | ||||||
|  |       provider: kubernetes | ||||||
							
								
								
									
										52
									
								
								templates/ServiceMonitor-Cadvisor.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								templates/ServiceMonitor-Cadvisor.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,52 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/name: cadvisor | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |   name: cadvisor | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     honorLabels: true | ||||||
|  |     honorTimestamps: false | ||||||
|  |     interval: 30s | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);; | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |       - pod | ||||||
|  |       - namespace | ||||||
|  |     - action: drop | ||||||
|  |       regex: (container_blkio_device_usage_total);.+ | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |       - container | ||||||
|  |     path: /metrics/cadvisor | ||||||
|  |     port: https-metrics | ||||||
|  |     #jobName: cadvisor | ||||||
|  |     relabelings: | ||||||
|  |     - sourceLabels: | ||||||
|  |       - __metrics_path__ | ||||||
|  |       targetLabel: metrics_path | ||||||
|  |     - targetLabel: "job" | ||||||
|  |       replacement: "cadvisor" | ||||||
|  |     scheme: https | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   namespaceSelector: | ||||||
|  |     matchNames: | ||||||
|  |     - kube-system | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/name: kubelet | ||||||
							
								
								
									
										36
									
								
								templates/ServiceMonitor-Calico.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								templates/ServiceMonitor-Calico.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | |||||||
|  | --- | ||||||
|  | {{- if eq .Values.nfc_monitoring.kubernetes.networking "calico" -}} | ||||||
|  |  | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/name: calico | ||||||
|  |     app.kubernetes.io/component: networking | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     k8s-app: calico-node | ||||||
|  |   name: calico | ||||||
|  |   namespace: kube-system | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 15s | ||||||
|  |     port: metrics | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_node_name | ||||||
|  |       targetLabel: instance | ||||||
|  |     scheme: http | ||||||
|  |     # tlsConfig: | ||||||
|  |     #   insecureSkipVerify: true | ||||||
|  |   jobLabel: app.kubernetes.io/name | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       k8s-app: calico-node | ||||||
|  |  | ||||||
|  | {{- end -}} | ||||||
							
								
								
									
										28
									
								
								templates/ServiceMonitor-CoreDNS.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								templates/ServiceMonitor-CoreDNS.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/name: coredns | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |   name: coredns | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 15s | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: coredns_cache_misses_total | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     port: metrics | ||||||
|  |   jobLabel: app.kubernetes.io/name | ||||||
|  |   namespaceSelector: | ||||||
|  |     matchNames: | ||||||
|  |     - kube-system | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       k8s-app: kube-dns | ||||||
							
								
								
									
										37
									
								
								templates/ServiceMonitor-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								templates/ServiceMonitor-Grafana.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: graphing | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: grafana | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: grafana | ||||||
|  |   namespace: {{ .Values.nfc_monitoring.grafana.namespace }} | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 15s | ||||||
|  |     port: grafana | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_label_app_kubernetes_io_instance | ||||||
|  |       targetLabel: instance | ||||||
|  |     scheme: http | ||||||
|  |     # tlsConfig: | ||||||
|  |     #   insecureSkipVerify: true | ||||||
|  |   targetLabels: | ||||||
|  |     - cluster | ||||||
|  |   jobLabel: app.kubernetes.io/name | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: graphing | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: grafana | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										87
									
								
								templates/ServiceMonitor-Kubelet.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								templates/ServiceMonitor-Kubelet.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,87 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/name: kubelet | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |   name: kubelet | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     honorLabels: true | ||||||
|  |     interval: 30s | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: transformation_(transformation_latencies_microseconds|failures_total) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     port: https-metrics | ||||||
|  |     relabelings: | ||||||
|  |     - sourceLabels: | ||||||
|  |       - __metrics_path__ | ||||||
|  |       targetLabel: metrics_path | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_endpoint_address_target_name | ||||||
|  |       targetLabel: instance | ||||||
|  |     scheme: https | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     honorLabels: true | ||||||
|  |     interval: 30s | ||||||
|  |     path: /metrics/probes | ||||||
|  |     port: https-metrics | ||||||
|  |     relabelings: | ||||||
|  |     - sourceLabels: | ||||||
|  |       - __metrics_path__ | ||||||
|  |       targetLabel: metrics_path | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_endpoint_address_target_name | ||||||
|  |       targetLabel: instance | ||||||
|  |     scheme: https | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   namespaceSelector: | ||||||
|  |     matchNames: | ||||||
|  |     - kube-system | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/name: kubelet | ||||||
							
								
								
									
										43
									
								
								templates/ServiceMonitor-Node.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								templates/ServiceMonitor-Node.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,43 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: node | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     #cluster: dev | ||||||
|  |   name: node | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |  | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 5s | ||||||
|  |     honorLabels: true | ||||||
|  |     path: /metrics | ||||||
|  |     port: grafana-metrics | ||||||
|  |     scheme: http | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_node_name | ||||||
|  |       targetLabel: instance | ||||||
|  |     - targetLabel: "job" | ||||||
|  |       replacement: "node-exporter" | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_node_name | ||||||
|  |       targetLabel: node | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/component: exporter | ||||||
|  |       app.kubernetes.io/name: grafana-agent | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										39
									
								
								templates/ServiceMonitor-ceph.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								templates/ServiceMonitor-ceph.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,39 @@ | |||||||
|  | --- | ||||||
|  | {{- if .Values.nfc_monitoring.additions.ceph.enabled | default false -}} | ||||||
|  |  | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/name: ceph | ||||||
|  |     app.kubernetes.io/component: storage | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |   name: ceph | ||||||
|  |   namespace: "{{ .Values.nfc_monitoring.additions.ceph.namespace }}" | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 15s | ||||||
|  |     port: http-metrics | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_label_app_kubernetes_io_part_of | ||||||
|  |       targetLabel: instance | ||||||
|  |     - sourceLabels: | ||||||
|  |       - __metrics_path__ | ||||||
|  |       targetLabel: metrics_path | ||||||
|  |     - targetLabel: "job" | ||||||
|  |       replacement: "ceph" | ||||||
|  |     scheme: http | ||||||
|  |   targetLabels: | ||||||
|  |     - cluster | ||||||
|  |   selector:  | ||||||
|  | {{ .Values.nfc_monitoring.additions.ceph.ServiceMonitor.selector | toYaml | indent 4 }} | ||||||
|  |  | ||||||
|  |  | ||||||
|  | {{- end -}} | ||||||
							
								
								
									
										82
									
								
								templates/ServiceMonitor-kube-controller-manager.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								templates/ServiceMonitor-kube-controller-manager.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,82 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-controller-manager | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     name_kcm: kube-controller-manager | ||||||
|  |   name: kube-controller-manager | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |  | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 10s | ||||||
|  |     honorLabels: true | ||||||
|  |     path: /metrics | ||||||
|  |     port: kube-ctrl-mgr | ||||||
|  |     scheme: https | ||||||
|  |     # labels: | ||||||
|  |     #   job: kube-controller-manager | ||||||
|  |     relabelings: | ||||||
|  |     # - action: replace | ||||||
|  |     #   regex: (.*) | ||||||
|  |     #   replacement: $1 | ||||||
|  |     #   sourceLabels: | ||||||
|  |     #   - __meta_kubernetes_pod_node_name | ||||||
|  |     #   targetLabel: instance | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_service_label_name_kcm | ||||||
|  |       targetLabel: job | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs|longrunning_gauge|registered_watchers) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|object_counts|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: transformation_(transformation_latencies_microseconds|failures_total) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     - action: drop | ||||||
|  |       regex: etcd_(debugging|disk|request|server).* | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										36
									
								
								templates/ServiceMonitor-kube-scheduler.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								templates/ServiceMonitor-kube-scheduler.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: proxy | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-scheduler | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-scheduler | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |  | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 10s | ||||||
|  |     honorLabels: true | ||||||
|  |     path: /metrics | ||||||
|  |     port: kube-scheduler | ||||||
|  |     scheme: https | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_service_label_name_ks | ||||||
|  |       targetLabel: job | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: kube-monitor-proxy | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										46
									
								
								templates/ServiceMonitor-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								templates/ServiceMonitor-kubeStateMetrics.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/instance: k8s | ||||||
|  |     app.kubernetes.io/name: kube-state-metrics | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: kube-state-metrics | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |  | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     honorLabels: true | ||||||
|  |     interval: 30s | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: kube_endpoint_address_not_ready|kube_endpoint_address_available | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     port: https-main | ||||||
|  |     relabelings: | ||||||
|  |     - action: labeldrop | ||||||
|  |       regex: (pod|service|endpoint|namespace) | ||||||
|  |     scheme: https | ||||||
|  |     scrapeTimeout: 30s | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |  | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 30s | ||||||
|  |     port: https-self | ||||||
|  |     scheme: https | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   jobLabel: app.kubernetes.io/name | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: exporter | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/name: kube-state-metrics | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										44
									
								
								templates/ServiceMonitor-node-exporter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								templates/ServiceMonitor-node-exporter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: exporter | ||||||
|  |     app.kubernetes.io/name: node-exporter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |   name: node-exporter | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |  | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 5s | ||||||
|  |     honorLabels: true | ||||||
|  |     path: /integrations/node_exporter/metrics | ||||||
|  |     port: grafana-metrics | ||||||
|  |     scheme: http | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_node_name | ||||||
|  |       targetLabel: instance | ||||||
|  |     - targetLabel: "job" | ||||||
|  |       replacement: "node-exporter" | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_node_name | ||||||
|  |       targetLabel: node | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/instance: k8s | ||||||
|  |       app.kubernetes.io/component: exporter | ||||||
|  |       app.kubernetes.io/name: grafana-agent | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
							
								
								
									
										39
									
								
								templates/ServiceMonitor-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								templates/ServiceMonitor-prometheus-adapter.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,39 @@ | |||||||
|  | --- | ||||||
|  | apiVersion: monitoring.coreos.com/v1 | ||||||
|  | kind: ServiceMonitor | ||||||
|  | metadata: | ||||||
|  |   labels: | ||||||
|  |     app.kubernetes.io/component: metrics-adapter | ||||||
|  |     app.kubernetes.io/instance: main | ||||||
|  |     app.kubernetes.io/name: prometheus-adapter | ||||||
|  |     app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
|  |     app.kubernetes.io/managed-by: {{ $.Release.Service }} | ||||||
|  |     app.kubernetes.io/version: {{ $.Chart.Version }} | ||||||
|  |   name: prometheus-adapter | ||||||
|  |   namespace: monitoring | ||||||
|  | spec: | ||||||
|  |   endpoints: | ||||||
|  |   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token | ||||||
|  |     interval: 30s | ||||||
|  |     metricRelabelings: | ||||||
|  |     - action: drop | ||||||
|  |       regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*) | ||||||
|  |       sourceLabels: | ||||||
|  |       - __name__ | ||||||
|  |     port: https | ||||||
|  |     scheme: https | ||||||
|  |     relabelings: | ||||||
|  |     - action: replace | ||||||
|  |       regex: (.*) | ||||||
|  |       replacement: $1 | ||||||
|  |       sourceLabels: | ||||||
|  |       - __meta_kubernetes_pod_label_app_kubernetes_io_instance | ||||||
|  |       targetLabel: instance | ||||||
|  |     tlsConfig: | ||||||
|  |       insecureSkipVerify: true | ||||||
|  |   selector: | ||||||
|  |     matchLabels: | ||||||
|  |       app.kubernetes.io/component: metrics-adapter | ||||||
|  |       app.kubernetes.io/instance: main | ||||||
|  |       app.kubernetes.io/name: prometheus-adapter | ||||||
|  |       app.kubernetes.io/part-of: {{ $.Chart.Name }} | ||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user