fix(ceph): PromRule CephPGImbalance adjusted to group by node
balancing is done by hostname not osd. !1
This commit is contained in:
@ -257,10 +257,25 @@ spec:
|
||||
description: "OSD {{ `{{` }} $labels.ceph_daemon }} on {{ `{{` }} $labels.hostname }} deviates by more than 30% from average PG count."
|
||||
summary: "PGs are not balanced across OSDs"
|
||||
expr: |
|
||||
abs(
|
||||
((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) /
|
||||
on (job) group_left avg(ceph_osd_numpg > 0) by (job)
|
||||
) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
|
||||
# abs(
|
||||
# ((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) /
|
||||
# on (job) group_left avg(ceph_osd_numpg > 0) by (hostname)
|
||||
# ) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
|
||||
(sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
|
||||
<
|
||||
scalar(
|
||||
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
|
||||
-
|
||||
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) * 0.3 ))
|
||||
)
|
||||
or
|
||||
(sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
|
||||
>
|
||||
scalar(
|
||||
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata))
|
||||
+
|
||||
(avg by (job) (sum by(hostname) ((ceph_osd_numpg > 0) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata)) * 0.3 ))
|
||||
)
|
||||
for: "5m"
|
||||
labels:
|
||||
oid: "1.3.6.1.4.1.50495.1.2.1.4.5"
|
||||
|
Reference in New Issue
Block a user