add metrics ioerror counter for alerts on I/O errors (#19618)

This commit is contained in:
Harshavardhana
2024-04-25 15:01:31 -07:00
committed by GitHub
parent 9a3c992d7a
commit c54ffde568
5 changed files with 36 additions and 13 deletions
+16
View File
@@ -541,6 +541,16 @@ func getNodeDriveTimeoutErrorsMD() MetricDescription {
}
}
func getNodeDriveIOErrorsMD() MetricDescription {
return MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: driveSubsystem,
Name: "errors_ioerror",
Help: "Total number of drive I/O errors since server start",
Type: counterMetric,
}
}
func getNodeDriveAvailabilityErrorsMD() MetricDescription {
return MetricDescription{
Namespace: nodeMetricNamespace,
@@ -3521,6 +3531,12 @@ func getLocalStorageMetrics(opts MetricsGroupOpts) *MetricsGroupV2 {
VariableLabels: map[string]string{"drive": disk.DrivePath},
})
metrics = append(metrics, MetricV2{
Description: getNodeDriveIOErrorsMD(),
Value: float64(disk.Metrics.TotalErrorsAvailability - disk.Metrics.TotalErrorsTimeout),
VariableLabels: map[string]string{"drive": disk.DrivePath},
})
metrics = append(metrics, MetricV2{
Description: getNodeDriveAvailabilityErrorsMD(),
Value: float64(disk.Metrics.TotalErrorsAvailability),