Update 2 files
- /roles/proxmox_base_setup/templates/storcli_metrics.sh.j2 - /roles/proxmox_base_setup/templates/node_exporter.service.j2
This commit is contained in:
parent
906555ab4b
commit
94d0108766
@ -18,14 +18,21 @@ ExecStart=/usr/local/bin/node_exporter \
|
|||||||
--collector.loadavg \
|
--collector.loadavg \
|
||||||
--collector.time \
|
--collector.time \
|
||||||
--collector.textfile.directory=/var/lib/node_exporter/textfile_collector \
|
--collector.textfile.directory=/var/lib/node_exporter/textfile_collector \
|
||||||
--web.listen-address=0.0.0.0:9100
|
--web.listen-address=0.0.0.0:9100 \
|
||||||
|
--web.telemetry-path=/metrics
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=5
|
RestartSec=5
|
||||||
# Защита от атак и случайных ошибок
|
|
||||||
|
# Безопасность (Proxmox-совместимо)
|
||||||
NoNewPrivileges=yes
|
NoNewPrivileges=yes
|
||||||
ProtectSystem=strict
|
ProtectSystem=strict
|
||||||
ProtectHome=yes
|
ProtectHome=yes
|
||||||
PrivateTmp=yes
|
PrivateTmp=yes
|
||||||
|
ProtectControlGroups=yes
|
||||||
|
ProtectKernelModules=yes
|
||||||
|
ProtectKernelTunables=yes
|
||||||
|
LockPersonality=yes
|
||||||
|
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
@ -1,23 +1,91 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
if (enc != "" && slot != "") {
|
# Сбор метрик MegaRAID через storcli 1.17.08 → textfile_collector
|
||||||
# Вывести предыдущий диск
|
# Совместим с Proxmox VE 9.0
|
||||||
gsub(/ /, "_", model);
|
# Версия: 2025-11-14
|
||||||
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, (temp ~ /^[0-9]+$/ ? temp : 0)
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
OUT_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom.$$"
|
||||||
|
FINAL_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom"
|
||||||
|
|
||||||
|
trap 'rm -f "$OUT_FILE"' EXIT
|
||||||
|
|
||||||
|
{
|
||||||
|
echo "# HELP storcli_disk_temp Temperature of physical disk in Celsius"
|
||||||
|
echo "# TYPE storcli_disk_temp gauge"
|
||||||
|
echo "# HELP storcli_disk_state Disk state: 0=Offline,1=UGood,2=Online,3=Failed,4=Rebuild,5=... (see docs)"
|
||||||
|
echo "# TYPE storcli_disk_state gauge"
|
||||||
|
echo "# HELP storcli_disk_media_error_count Media error count"
|
||||||
|
echo "# TYPE storcli_disk_media_error_count counter"
|
||||||
|
echo "# HELP storcli_disk_other_error_count Other error count"
|
||||||
|
echo "# TYPE storcli_disk_other_error_count counter"
|
||||||
|
echo "# HELP storcli_array_state Virtual drive state: 0=Offline,1=Online,2=Degraded,3=Failed"
|
||||||
|
echo "# TYPE storcli_array_state gauge"
|
||||||
|
echo "# HELP storcli_controller_temp Controller temperature (°C), if available"
|
||||||
|
echo "# TYPE storcli_controller_temp gauge"
|
||||||
|
echo "# HELP storcli_controller_bbu_state BBU state: 0=Missing/Failed,1=Good,2=Degraded"
|
||||||
|
echo "# TYPE storcli_controller_bbu_state gauge"
|
||||||
|
} > "$OUT_FILE"
|
||||||
|
|
||||||
|
# Получаем список контроллеров (в storcli 1.17 — /call show краткий)
|
||||||
|
controllers=$(storcli /call show | awk '
|
||||||
|
/^$/ { next }
|
||||||
|
/Controller [0-9]+/ { print $2 }
|
||||||
|
' | grep -E '^[0-9]+$')
|
||||||
|
|
||||||
|
if [ -z "$controllers" ]; then
|
||||||
|
echo "# No controllers found" >&2
|
||||||
|
touch "$FINAL_FILE"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
for c in $controllers; do
|
||||||
|
# === Контроллер: температура и BBU ===
|
||||||
|
ctl_info=$(storcli /c$c show J)
|
||||||
|
# В 1.17.08 JSON-режим ограничен, но /c0 show J даёт базовые поля
|
||||||
|
# Температура может быть в строке: "Controller Temperature (C): 58"
|
||||||
|
ctl_temp=$(echo "$ctl_info" | grep -i "Controller Temperature" | grep -oE '[0-9]+' | head -1)
|
||||||
|
if [ -n "$ctl_temp" ]; then
|
||||||
|
echo "storcli_controller_temp{controller=\"${c}\"} ${ctl_temp}" >> "$OUT_FILE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# BBU state: ищем "BBU Status: Optimal" / "Failed" / "Missing"
|
||||||
|
bbu_line=$(echo "$ctl_info" | grep -i "BBU.*Status" | head -1)
|
||||||
|
bbu_state=0
|
||||||
|
if [[ "$bbu_line" =~ Optimal|Good ]]; then
|
||||||
|
bbu_state=1
|
||||||
|
elif [[ "$bbu_line" =~ Degraded|Weak ]]; then
|
||||||
|
bbu_state=2
|
||||||
|
fi
|
||||||
|
echo "storcli_controller_bbu_state{controller=\"${c}\"} ${bbu_state}" >> "$OUT_FILE"
|
||||||
|
|
||||||
|
# === Физические диски ===
|
||||||
|
# В 1.17.08 /c0/eall/sall show даёт таблицу, но без заголовков — парсим по блокам
|
||||||
|
storcli /c$c/eall/sall show | awk -v c="$c" '
|
||||||
|
BEGIN {
|
||||||
|
enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0;
|
||||||
|
media_err = 0; other_err = 0; in_pd = 0
|
||||||
|
}
|
||||||
|
/^PD:/ {
|
||||||
|
if (enc != -1 && slot != -1) {
|
||||||
|
gsub(/ /, "_", model);
|
||||||
|
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp
|
||||||
printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num
|
printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num
|
||||||
printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err
|
printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err
|
||||||
printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err
|
printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err
|
||||||
}
|
}
|
||||||
enc=""; slot=""; model=""; temp=""; media_err=0; other_err=0; state_num=0
|
enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0;
|
||||||
|
media_err = 0; other_err = 0; in_pd = 1
|
||||||
}
|
}
|
||||||
/Enclosure Device ID:/ { enc = $4 }
|
in_pd && /Enclosure Device ID:/ { enc = $4 }
|
||||||
/Slot Number:/ { slot = $3 }
|
in_pd && /Slot Number:/ { slot = $3 }
|
||||||
/Device Id:/ { if (enc == "") enc = $3 } # fallback
|
in_pd && /Device Id:/ { if (enc == -1) enc = $3 } # fallback
|
||||||
/Model Number:/ { model = $3 }
|
in_pd && /Model Number:/ { model = $3 }
|
||||||
/Drive Temperature/ {
|
in_pd && /Drive Temperature.*C/ {
|
||||||
match($0, /([0-9]+)C/);
|
match($0, /([0-9]+)C/);
|
||||||
if (RSTART) temp = substr($0, RSTART, RLENGTH-1)
|
if (RSTART) temp = substr($0, RSTART, RLENGTH-1)
|
||||||
}
|
}
|
||||||
/Firmware state:/ {
|
in_pd && /Firmware state:/ {
|
||||||
state = $3
|
state = $3
|
||||||
if (state ~ /Online/) state_num = 2
|
if (state ~ /Online/) state_num = 2
|
||||||
else if (state ~ /Unconfigured.*Good/) state_num = 1
|
else if (state ~ /Unconfigured.*Good/) state_num = 1
|
||||||
@ -26,39 +94,59 @@
|
|||||||
else if (state ~ /Offline/) state_num = 0
|
else if (state ~ /Offline/) state_num = 0
|
||||||
else state_num = -1
|
else state_num = -1
|
||||||
}
|
}
|
||||||
/Media Error Count:/ { media_err = $4 }
|
in_pd && /Media Error Count:/ { media_err = $4 }
|
||||||
/Other Error Count:/ { other_err = $4 }
|
in_pd && /Other Error Count:/ { other_err = $4 }
|
||||||
END {
|
END {
|
||||||
if (enc != "" && slot != "") {
|
if (enc != -1 && slot != -1) {
|
||||||
gsub(/ /, "_", model);
|
gsub(/ /, "_", model);
|
||||||
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, (temp ~ /^[0-9]+$/ ? temp : 0)
|
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp
|
||||||
printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num
|
printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num
|
||||||
printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err
|
printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err
|
||||||
printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err
|
printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
' c="$c")
|
' >> "$OUT_FILE"
|
||||||
|
|
||||||
echo "$disks_info" >> "$OUT_FILE"
|
# === Виртуальные диски (массивы) ===
|
||||||
|
storcli /c$c/vall show J | jq -r --arg c "$c" '
|
||||||
# Виртуальные диски (массивы)
|
.Controllers[0].Response.Data |
|
||||||
vds=$(storcli /c$c/vall show | awk '
|
to_entries[] |
|
||||||
NR>8 && $1 ~ /^[0-9]+$/ {
|
select(.key | test("VD[0-9]+")) |
|
||||||
vd = $1; state = $2; size = $3;
|
.value[] |
|
||||||
gsub(/,/,"", size);
|
.["DG/VD"] as $dgvd |
|
||||||
# state: Optl=Online(1), Dgrd=Degraded(2), Offln=Offline(0), Ft=Failed(3)
|
.State as $state |
|
||||||
state_num = 1;
|
.Size as $size |
|
||||||
if (state == "Dgrd") state_num = 2;
|
($dgvd | split("/")[1]) as $vd |
|
||||||
else if (state ~ /Offln|Pdgd/) state_num = 0;
|
($state == "Optl" or $state == "Onln") as $is_online |
|
||||||
else if (state == "Ft") state_num = 3;
|
($state == "Dgrd") as $is_degraded |
|
||||||
printf "storcli_array_state{controller=\"%s\",vd=\"%s\",size=\"%s\"} %s\n", c, vd, size, state_num
|
($state == "Offln" or $state == "Pdgd") as $is_offline |
|
||||||
}
|
($state == "Ft") as $is_failed |
|
||||||
')
|
(if $is_online then 1
|
||||||
echo "$vds" >> "$OUT_FILE"
|
elif $is_degraded then 2
|
||||||
|
elif $is_offline then 0
|
||||||
|
elif $is_failed then 3
|
||||||
|
else -1 end) as $state_num |
|
||||||
|
"storcli_array_state{controller=\"\($c)\",vd=\"\($vd)\",size=\"\($size)\"} \($state_num)"
|
||||||
|
' 2>/dev/null >> "$OUT_FILE" || {
|
||||||
|
# fallback без jq (если не установлен)
|
||||||
|
storcli /c$c/vall show | awk -v c="$c" '
|
||||||
|
NR <= 8 { next }
|
||||||
|
$1 ~ /^[0-9]+$/ {
|
||||||
|
vd = $1
|
||||||
|
state = $2
|
||||||
|
size = $3
|
||||||
|
gsub(/,/,"",size)
|
||||||
|
state_num = 1
|
||||||
|
if (state == "Dgrd") state_num = 2
|
||||||
|
else if (state ~ /Offln|Pdgd/) state_num = 0
|
||||||
|
else if (state == "Ft") state_num = 3
|
||||||
|
printf "storcli_array_state{controller=\"%s\",vd=\"%s\",size=\"%s\"} %s\n", c, vd, size, state_num
|
||||||
|
}
|
||||||
|
' >> "$OUT_FILE"
|
||||||
|
}
|
||||||
done
|
done
|
||||||
|
|
||||||
# Атомарная замена
|
|
||||||
mv "$OUT_FILE" "$FINAL_FILE"
|
mv "$OUT_FILE" "$FINAL_FILE"
|
||||||
chmod 644 "$FINAL_FILE"
|
chmod 644 "$FINAL_FILE"
|
||||||
|
|
||||||
echo "$(date -Iseconds) storcli metrics updated" >&2
|
echo "$(date -Iseconds) storcli metrics updated successfully" >&2
|
||||||
Loading…
Reference in New Issue
Block a user