Update 2 files

- /roles/proxmox_base_setup/templates/storcli_metrics.sh.j2
- /roles/proxmox_base_setup/templates/node_exporter.service.j2
This commit is contained in:
Administrator 2025-11-14 07:18:41 +00:00
parent 906555ab4b
commit 94d0108766
2 changed files with 133 additions and 38 deletions

View File

@ -18,14 +18,21 @@ ExecStart=/usr/local/bin/node_exporter \
--collector.loadavg \
--collector.time \
--collector.textfile.directory=/var/lib/node_exporter/textfile_collector \
--web.listen-address=0.0.0.0:9100
--web.listen-address=0.0.0.0:9100 \
--web.telemetry-path=/metrics
Restart=always
RestartSec=5
# Защита от атак и случайных ошибок
# Безопасность (Proxmox-совместимо)
NoNewPrivileges=yes
ProtectSystem=strict
ProtectHome=yes
PrivateTmp=yes
ProtectControlGroups=yes
ProtectKernelModules=yes
ProtectKernelTunables=yes
LockPersonality=yes
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
[Install]
WantedBy=multi-user.target

View File

@ -1,23 +1,91 @@
#!/bin/bash
if (enc != "" && slot != "") {
# Вывести предыдущий диск
# Сбор метрик MegaRAID через storcli 1.17.08 → textfile_collector
# Совместим с Proxmox VE 9.0
# Версия: 2025-11-14
set -euo pipefail
OUT_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom.$$"
FINAL_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom"
trap 'rm -f "$OUT_FILE"' EXIT
{
echo "# HELP storcli_disk_temp Temperature of physical disk in Celsius"
echo "# TYPE storcli_disk_temp gauge"
echo "# HELP storcli_disk_state Disk state: 0=Offline,1=UGood,2=Online,3=Failed,4=Rebuild,5=... (see docs)"
echo "# TYPE storcli_disk_state gauge"
echo "# HELP storcli_disk_media_error_count Media error count"
echo "# TYPE storcli_disk_media_error_count counter"
echo "# HELP storcli_disk_other_error_count Other error count"
echo "# TYPE storcli_disk_other_error_count counter"
echo "# HELP storcli_array_state Virtual drive state: 0=Offline,1=Online,2=Degraded,3=Failed"
echo "# TYPE storcli_array_state gauge"
echo "# HELP storcli_controller_temp Controller temperature (°C), if available"
echo "# TYPE storcli_controller_temp gauge"
echo "# HELP storcli_controller_bbu_state BBU state: 0=Missing/Failed,1=Good,2=Degraded"
echo "# TYPE storcli_controller_bbu_state gauge"
} > "$OUT_FILE"
# Получаем список контроллеров (в storcli 1.17 — /call show краткий)
controllers=$(storcli /call show | awk '
/^$/ { next }
/Controller [0-9]+/ { print $2 }
' | grep -E '^[0-9]+$')
if [ -z "$controllers" ]; then
echo "# No controllers found" >&2
touch "$FINAL_FILE"
exit 0
fi
for c in $controllers; do
# === Контроллер: температура и BBU ===
ctl_info=$(storcli /c$c show J)
# В 1.17.08 JSON-режим ограничен, но /c0 show J даёт базовые поля
# Температура может быть в строке: "Controller Temperature (C): 58"
ctl_temp=$(echo "$ctl_info" | grep -i "Controller Temperature" | grep -oE '[0-9]+' | head -1)
if [ -n "$ctl_temp" ]; then
echo "storcli_controller_temp{controller=\"${c}\"} ${ctl_temp}" >> "$OUT_FILE"
fi
# BBU state: ищем "BBU Status: Optimal" / "Failed" / "Missing"
bbu_line=$(echo "$ctl_info" | grep -i "BBU.*Status" | head -1)
bbu_state=0
if [[ "$bbu_line" =~ Optimal|Good ]]; then
bbu_state=1
elif [[ "$bbu_line" =~ Degraded|Weak ]]; then
bbu_state=2
fi
echo "storcli_controller_bbu_state{controller=\"${c}\"} ${bbu_state}" >> "$OUT_FILE"
# === Физические диски ===
# В 1.17.08 /c0/eall/sall show даёт таблицу, но без заголовков — парсим по блокам
storcli /c$c/eall/sall show | awk -v c="$c" '
BEGIN {
enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0;
media_err = 0; other_err = 0; in_pd = 0
}
/^PD:/ {
if (enc != -1 && slot != -1) {
gsub(/ /, "_", model);
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, (temp ~ /^[0-9]+$/ ? temp : 0)
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp
printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num
printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err
printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err
}
enc=""; slot=""; model=""; temp=""; media_err=0; other_err=0; state_num=0
enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0;
media_err = 0; other_err = 0; in_pd = 1
}
/Enclosure Device ID:/ { enc = $4 }
/Slot Number:/ { slot = $3 }
/Device Id:/ { if (enc == "") enc = $3 } # fallback
/Model Number:/ { model = $3 }
/Drive Temperature/ {
in_pd && /Enclosure Device ID:/ { enc = $4 }
in_pd && /Slot Number:/ { slot = $3 }
in_pd && /Device Id:/ { if (enc == -1) enc = $3 } # fallback
in_pd && /Model Number:/ { model = $3 }
in_pd && /Drive Temperature.*C/ {
match($0, /([0-9]+)C/);
if (RSTART) temp = substr($0, RSTART, RLENGTH-1)
}
/Firmware state:/ {
in_pd && /Firmware state:/ {
state = $3
if (state ~ /Online/) state_num = 2
else if (state ~ /Unconfigured.*Good/) state_num = 1
@ -26,39 +94,59 @@
else if (state ~ /Offline/) state_num = 0
else state_num = -1
}
/Media Error Count:/ { media_err = $4 }
/Other Error Count:/ { other_err = $4 }
in_pd && /Media Error Count:/ { media_err = $4 }
in_pd && /Other Error Count:/ { other_err = $4 }
END {
if (enc != "" && slot != "") {
if (enc != -1 && slot != -1) {
gsub(/ /, "_", model);
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, (temp ~ /^[0-9]+$/ ? temp : 0)
printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp
printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num
printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err
printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err
}
}
' c="$c")
' >> "$OUT_FILE"
echo "$disks_info" >> "$OUT_FILE"
# Виртуальные диски (массивы)
vds=$(storcli /c$c/vall show | awk '
NR>8 && $1 ~ /^[0-9]+$/ {
vd = $1; state = $2; size = $3;
gsub(/,/,"", size);
# state: Optl=Online(1), Dgrd=Degraded(2), Offln=Offline(0), Ft=Failed(3)
state_num = 1;
if (state == "Dgrd") state_num = 2;
else if (state ~ /Offln|Pdgd/) state_num = 0;
else if (state == "Ft") state_num = 3;
# === Виртуальные диски (массивы) ===
storcli /c$c/vall show J | jq -r --arg c "$c" '
.Controllers[0].Response.Data |
to_entries[] |
select(.key | test("VD[0-9]+")) |
.value[] |
.["DG/VD"] as $dgvd |
.State as $state |
.Size as $size |
($dgvd | split("/")[1]) as $vd |
($state == "Optl" or $state == "Onln") as $is_online |
($state == "Dgrd") as $is_degraded |
($state == "Offln" or $state == "Pdgd") as $is_offline |
($state == "Ft") as $is_failed |
(if $is_online then 1
elif $is_degraded then 2
elif $is_offline then 0
elif $is_failed then 3
else -1 end) as $state_num |
"storcli_array_state{controller=\"\($c)\",vd=\"\($vd)\",size=\"\($size)\"} \($state_num)"
' 2>/dev/null >> "$OUT_FILE" || {
# fallback без jq (если не установлен)
storcli /c$c/vall show | awk -v c="$c" '
NR <= 8 { next }
$1 ~ /^[0-9]+$/ {
vd = $1
state = $2
size = $3
gsub(/,/,"",size)
state_num = 1
if (state == "Dgrd") state_num = 2
else if (state ~ /Offln|Pdgd/) state_num = 0
else if (state == "Ft") state_num = 3
printf "storcli_array_state{controller=\"%s\",vd=\"%s\",size=\"%s\"} %s\n", c, vd, size, state_num
}
')
echo "$vds" >> "$OUT_FILE"
' >> "$OUT_FILE"
}
done
# Атомарная замена
mv "$OUT_FILE" "$FINAL_FILE"
chmod 644 "$FINAL_FILE"
echo "$(date -Iseconds) storcli metrics updated" >&2
echo "$(date -Iseconds) storcli metrics updated successfully" >&2