- /roles/proxmox_monitoring/handlers/main.yml - /roles/proxmox_monitoring/tasks/main.yml - /roles/proxmox_monitoring/templates/node_exporter.service.j2 - /roles/proxmox_monitoring/templates/storcli_metrics.sh.j2 - /roles/proxmox_monitoring/templates/pve_exporter_config.yml.j2 - /roles/proxmox_base_setup/tasks/main.yml - /roles/grafana/templates/docker-compose.yml.j2 - /roles/grafana/files/vmagent.yaml - /roles/base_setup/tasks/main.yml - /roles/base_setup/handlers/main.yml - /group_vars/all.yml - /olimp-deploy.yml
79 lines
3.5 KiB
Django/Jinja
79 lines
3.5 KiB
Django/Jinja
#!/bin/bash
|
|
|
|
OUT_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom"
|
|
|
|
# Очищаем файл
|
|
> "$OUT_FILE"
|
|
|
|
# Заголовки метрик
|
|
echo "# HELP storcli_disk_temp Temperature of physical disk in Celsius" >> "$OUT_FILE"
|
|
echo "# TYPE storcli_disk_temp gauge" >> "$OUT_FILE"
|
|
echo "# HELP storcli_disk_state Disk state: 0=Offline,1=UGood,2=Online,3=Failed,4=Rebuild" >> "$OUT_FILE"
|
|
echo "# TYPE storcli_disk_state gauge" >> "$OUT_FILE"
|
|
echo "# HELP storcli_disk_media_error_count Media error count" >> "$OUT_FILE"
|
|
echo "# TYPE storcli_disk_media_error_count counter" >> "$OUT_FILE"
|
|
echo "# HELP storcli_disk_other_error_count Other error count" >> "$OUT_FILE"
|
|
echo "# TYPE storcli_disk_other_error_count counter" >> "$OUT_FILE"
|
|
echo "# HELP storcli_array_state Virtual drive state: 0=Offline,1=Online,2=Degraded,3=Failed" >> "$OUT_FILE"
|
|
echo "# TYPE storcli_array_state gauge" >> "$OUT_FILE"
|
|
|
|
# Состояния дисков из общей таблицы
|
|
/opt/MegaRAID/storcli/storcli64 /c0/eall/sall show all | awk '
|
|
/^252:[0-9]+\s+/ {
|
|
split($1, parts, ":");
|
|
slot = parts[2];
|
|
did = $2;
|
|
state = $3;
|
|
|
|
state_num = 2; # по умолчанию Online
|
|
if (state == "Offln") state_num = 0;
|
|
else if (state == "UGood") state_num = 1;
|
|
else if (state == "Failed") state_num = 3;
|
|
else if (state == "Rebuild") state_num = 4;
|
|
|
|
printf "storcli_disk_state{controller=\"0\",enclosure=\"252\",slot=\"%s\",did=\"%s\",model=\"ST8000NM0075\"} %s\n", slot, did, state_num
|
|
}
|
|
' >> "$OUT_FILE"
|
|
|
|
# Температуры и ошибки для каждого диска
|
|
for slot in 0 1 2 3; do
|
|
echo "Processing disk slot $slot..."
|
|
disk_info=$(/opt/MegaRAID/storcli/storcli64 /c0/e252/s$slot show all)
|
|
|
|
# Температура
|
|
temp=$(echo "$disk_info" | grep "Drive Temperature" | grep -oE '[0-9]+' | head -1)
|
|
if [ -n "$temp" ]; then
|
|
echo "storcli_disk_temp{controller=\"0\",enclosure=\"252\",slot=\"$slot\",model=\"ST8000NM0075\"} $temp" >> "$OUT_FILE"
|
|
echo " Temperature: $temp°C"
|
|
fi
|
|
|
|
# Ошибки
|
|
media_line=$(echo "$disk_info" | grep "Media Error Count")
|
|
other_line=$(echo "$disk_info" | grep "Other Error Count")
|
|
|
|
# Извлекаем числа после "="
|
|
media_errors=$(echo "$media_line" | awk -F= '{print $2}' | awk '{print $1}')
|
|
other_errors=$(echo "$other_line" | awk -F= '{print $2}' | awk '{print $1}')
|
|
|
|
# Записываем ошибки (0 по умолчанию)
|
|
echo "storcli_disk_media_error_count{controller=\"0\",enclosure=\"252\",slot=\"$slot\",model=\"ST8000NM0075\"} ${media_errors:-0}" >> "$OUT_FILE"
|
|
echo "storcli_disk_other_error_count{controller=\"0\",enclosure=\"252\",slot=\"$slot\",model=\"ST8000NM0075\"} ${other_errors:-0}" >> "$OUT_FILE"
|
|
|
|
echo " Media errors: ${media_errors:-0}, Other errors: ${other_errors:-0}"
|
|
done
|
|
|
|
# Состояние массива
|
|
array_line=$(/opt/MegaRAID/storcli/storcli64 /c0/vall show | grep "^0/0")
|
|
array_state=$(echo "$array_line" | awk '{print $3}')
|
|
state_num=1 # по умолчанию Optimal
|
|
[[ "$array_state" == "Dgrd" ]] && state_num=2
|
|
[[ "$array_state" == "Offln" ]] && state_num=0
|
|
[[ "$array_state" == "Pdgd" ]] && state_num=0
|
|
[[ "$array_state" == "Ft" ]] && state_num=3
|
|
|
|
echo "storcli_array_state{controller=\"0\",vd=\"0\"} $state_num" >> "$OUT_FILE"
|
|
echo "Array state: $array_state ($state_num)"
|
|
|
|
chmod 644 "$OUT_FILE"
|
|
echo "$(date -Iseconds) storcli metrics updated successfully"
|