#!/bin/bash # Сбор метрик MegaRAID через storcli 1.17.08 → textfile_collector # Совместим с Proxmox VE 9.0 # Версия: 2025-11-14 set -euo pipefail OUT_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom.$$" FINAL_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom" trap 'rm -f "$OUT_FILE"' EXIT { echo "# HELP storcli_disk_temp Temperature of physical disk in Celsius" echo "# TYPE storcli_disk_temp gauge" echo "# HELP storcli_disk_state Disk state: 0=Offline,1=UGood,2=Online,3=Failed,4=Rebuild,5=... (see docs)" echo "# TYPE storcli_disk_state gauge" echo "# HELP storcli_disk_media_error_count Media error count" echo "# TYPE storcli_disk_media_error_count counter" echo "# HELP storcli_disk_other_error_count Other error count" echo "# TYPE storcli_disk_other_error_count counter" echo "# HELP storcli_array_state Virtual drive state: 0=Offline,1=Online,2=Degraded,3=Failed" echo "# TYPE storcli_array_state gauge" echo "# HELP storcli_controller_temp Controller temperature (°C), if available" echo "# TYPE storcli_controller_temp gauge" echo "# HELP storcli_controller_bbu_state BBU state: 0=Missing/Failed,1=Good,2=Degraded" echo "# TYPE storcli_controller_bbu_state gauge" } > "$OUT_FILE" # Получаем список контроллеров (в storcli 1.17 — /call show краткий) controllers=$(storcli /call show | awk ' /^$/ { next } /Controller [0-9]+/ { print $2 } ' | grep -E '^[0-9]+$') if [ -z "$controllers" ]; then echo "# No controllers found" >&2 touch "$FINAL_FILE" exit 0 fi for c in $controllers; do # === Контроллер: температура и BBU === ctl_info=$(storcli /c$c show J) # В 1.17.08 JSON-режим ограничен, но /c0 show J даёт базовые поля # Температура может быть в строке: "Controller Temperature (C): 58" ctl_temp=$(echo "$ctl_info" | grep -i "Controller Temperature" | grep -oE '[0-9]+' | head -1) if [ -n "$ctl_temp" ]; then echo "storcli_controller_temp{controller=\"${c}\"} ${ctl_temp}" >> "$OUT_FILE" fi # BBU state: ищем "BBU Status: Optimal" / "Failed" / "Missing" bbu_line=$(echo "$ctl_info" | grep -i "BBU.*Status" | head -1) bbu_state=0 if [[ "$bbu_line" =~ Optimal|Good ]]; then bbu_state=1 elif [[ "$bbu_line" =~ Degraded|Weak ]]; then bbu_state=2 fi echo "storcli_controller_bbu_state{controller=\"${c}\"} ${bbu_state}" >> "$OUT_FILE" # === Физические диски === # В 1.17.08 /c0/eall/sall show даёт таблицу, но без заголовков — парсим по блокам storcli /c$c/eall/sall show | awk -v c="$c" ' BEGIN { enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0; media_err = 0; other_err = 0; in_pd = 0 } /^PD:/ { if (enc != -1 && slot != -1) { gsub(/ /, "_", model); printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err } enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0; media_err = 0; other_err = 0; in_pd = 1 } in_pd && /Enclosure Device ID:/ { enc = $4 } in_pd && /Slot Number:/ { slot = $3 } in_pd && /Device Id:/ { if (enc == -1) enc = $3 } # fallback in_pd && /Model Number:/ { model = $3 } in_pd && /Drive Temperature.*C/ { match($0, /([0-9]+)C/); if (RSTART) temp = substr($0, RSTART, RLENGTH-1) } in_pd && /Firmware state:/ { state = $3 if (state ~ /Online/) state_num = 2 else if (state ~ /Unconfigured.*Good/) state_num = 1 else if (state ~ /Failed/) state_num = 3 else if (state ~ /Rebuild/) state_num = 4 else if (state ~ /Offline/) state_num = 0 else state_num = -1 } in_pd && /Media Error Count:/ { media_err = $4 } in_pd && /Other Error Count:/ { other_err = $4 } END { if (enc != -1 && slot != -1) { gsub(/ /, "_", model); printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err } } ' >> "$OUT_FILE" # === Виртуальные диски (массивы) === storcli /c$c/vall show J | jq -r --arg c "$c" ' .Controllers[0].Response.Data | to_entries[] | select(.key | test("VD[0-9]+")) | .value[] | .["DG/VD"] as $dgvd | .State as $state | .Size as $size | ($dgvd | split("/")[1]) as $vd | ($state == "Optl" or $state == "Onln") as $is_online | ($state == "Dgrd") as $is_degraded | ($state == "Offln" or $state == "Pdgd") as $is_offline | ($state == "Ft") as $is_failed | (if $is_online then 1 elif $is_degraded then 2 elif $is_offline then 0 elif $is_failed then 3 else -1 end) as $state_num | "storcli_array_state{controller=\"\($c)\",vd=\"\($vd)\",size=\"\($size)\"} \($state_num)" ' 2>/dev/null >> "$OUT_FILE" || { # fallback без jq (если не установлен) storcli /c$c/vall show | awk -v c="$c" ' NR <= 8 { next } $1 ~ /^[0-9]+$/ { vd = $1 state = $2 size = $3 gsub(/,/,"",size) state_num = 1 if (state == "Dgrd") state_num = 2 else if (state ~ /Offln|Pdgd/) state_num = 0 else if (state == "Ft") state_num = 3 printf "storcli_array_state{controller=\"%s\",vd=\"%s\",size=\"%s\"} %s\n", c, vd, size, state_num } ' >> "$OUT_FILE" } done mv "$OUT_FILE" "$FINAL_FILE" chmod 644 "$FINAL_FILE" echo "$(date -Iseconds) storcli metrics updated successfully" >&2