fix(ceph): PromRule CephPGImbalance adjusted to group by node

balancing is done by hostname not osd.

!1
This commit is contained in:
2023-09-23 23:11:38 +09:30
parent 4198ac78da
commit 944d615331

View File

@ -257,10 +257,25 @@ spec:
description: "OSD {{ `{{` }} $labels.ceph_daemon }} on {{ `{{` }} $labels.hostname }} deviates by more than 30% from average PG count."
summary: "PGs are not balanced across OSDs"
expr: |
abs(
((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) /
on (job) group_left avg(ceph_osd_numpg > 0) by (job)
) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
# abs(
# ((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) /
# on (job) group_left avg(ceph_osd_numpg > 0) by (hostname)
# ) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
(sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
<
scalar(
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
-
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) * 0.3 ))
)
or
(sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
>
scalar(
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
+
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) * 0.3 ))
)
for: "5m"
labels:
oid: "1.3.6.1.4.1.50495.1.2.1.4.5"