diff --git a/templates/prometheusRule-ceph.yaml b/templates/prometheusRule-ceph.yaml index 61c279b..e6a9a28 100644 --- a/templates/prometheusRule-ceph.yaml +++ b/templates/prometheusRule-ceph.yaml @@ -257,10 +257,25 @@ spec: description: "OSD {{ `{{` }} $labels.ceph_daemon }} on {{ `{{` }} $labels.hostname }} deviates by more than 30% from average PG count." summary: "PGs are not balanced across OSDs" expr: | - abs( - ((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) / - on (job) group_left avg(ceph_osd_numpg > 0) by (job) - ) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30 + # abs( + # ((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) / + # on (job) group_left avg(ceph_osd_numpg > 0) by (hostname) + # ) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30 + (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) + < + scalar( + (avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) + - + (avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) * 0.3 )) + ) + or + (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) + > + scalar( + (avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) + + + (avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) * 0.3 )) + ) for: "5m" labels: oid: "1.3.6.1.4.1.50495.1.2.1.4.5"