From 94d0108766b73ff75a51d51c9293f05c36cc396e Mon Sep 17 00:00:00 2001 From: Administrator Date: Fri, 14 Nov 2025 07:18:41 +0000 Subject: [PATCH] Update 2 files - /roles/proxmox_base_setup/templates/storcli_metrics.sh.j2 - /roles/proxmox_base_setup/templates/node_exporter.service.j2 --- .../templates/node_exporter.service.j2 | 11 +- .../templates/storcli_metrics.sh.j2 | 160 ++++++++++++++---- 2 files changed, 133 insertions(+), 38 deletions(-) diff --git a/roles/proxmox_base_setup/templates/node_exporter.service.j2 b/roles/proxmox_base_setup/templates/node_exporter.service.j2 index b7eeedc..e354645 100644 --- a/roles/proxmox_base_setup/templates/node_exporter.service.j2 +++ b/roles/proxmox_base_setup/templates/node_exporter.service.j2 @@ -18,14 +18,21 @@ ExecStart=/usr/local/bin/node_exporter \ --collector.loadavg \ --collector.time \ --collector.textfile.directory=/var/lib/node_exporter/textfile_collector \ - --web.listen-address=0.0.0.0:9100 + --web.listen-address=0.0.0.0:9100 \ + --web.telemetry-path=/metrics Restart=always RestartSec=5 -# Защита от атак и случайных ошибок + +# Безопасность (Proxmox-совместимо) NoNewPrivileges=yes ProtectSystem=strict ProtectHome=yes PrivateTmp=yes +ProtectControlGroups=yes +ProtectKernelModules=yes +ProtectKernelTunables=yes +LockPersonality=yes +RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX [Install] WantedBy=multi-user.target \ No newline at end of file diff --git a/roles/proxmox_base_setup/templates/storcli_metrics.sh.j2 b/roles/proxmox_base_setup/templates/storcli_metrics.sh.j2 index 807632c..1adcf05 100644 --- a/roles/proxmox_base_setup/templates/storcli_metrics.sh.j2 +++ b/roles/proxmox_base_setup/templates/storcli_metrics.sh.j2 @@ -1,23 +1,91 @@ #!/bin/bash - if (enc != "" && slot != "") { - # Вывести предыдущий диск - gsub(/ /, "_", model); - printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, (temp ~ /^[0-9]+$/ ? temp : 0) +# Сбор метрик MegaRAID через storcli 1.17.08 → textfile_collector +# Совместим с Proxmox VE 9.0 +# Версия: 2025-11-14 + +set -euo pipefail + +OUT_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom.$$" +FINAL_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom" + +trap 'rm -f "$OUT_FILE"' EXIT + +{ + echo "# HELP storcli_disk_temp Temperature of physical disk in Celsius" + echo "# TYPE storcli_disk_temp gauge" + echo "# HELP storcli_disk_state Disk state: 0=Offline,1=UGood,2=Online,3=Failed,4=Rebuild,5=... (see docs)" + echo "# TYPE storcli_disk_state gauge" + echo "# HELP storcli_disk_media_error_count Media error count" + echo "# TYPE storcli_disk_media_error_count counter" + echo "# HELP storcli_disk_other_error_count Other error count" + echo "# TYPE storcli_disk_other_error_count counter" + echo "# HELP storcli_array_state Virtual drive state: 0=Offline,1=Online,2=Degraded,3=Failed" + echo "# TYPE storcli_array_state gauge" + echo "# HELP storcli_controller_temp Controller temperature (°C), if available" + echo "# TYPE storcli_controller_temp gauge" + echo "# HELP storcli_controller_bbu_state BBU state: 0=Missing/Failed,1=Good,2=Degraded" + echo "# TYPE storcli_controller_bbu_state gauge" +} > "$OUT_FILE" + +# Получаем список контроллеров (в storcli 1.17 — /call show краткий) +controllers=$(storcli /call show | awk ' + /^$/ { next } + /Controller [0-9]+/ { print $2 } +' | grep -E '^[0-9]+$') + +if [ -z "$controllers" ]; then + echo "# No controllers found" >&2 + touch "$FINAL_FILE" + exit 0 +fi + +for c in $controllers; do + # === Контроллер: температура и BBU === + ctl_info=$(storcli /c$c show J) + # В 1.17.08 JSON-режим ограничен, но /c0 show J даёт базовые поля + # Температура может быть в строке: "Controller Temperature (C): 58" + ctl_temp=$(echo "$ctl_info" | grep -i "Controller Temperature" | grep -oE '[0-9]+' | head -1) + if [ -n "$ctl_temp" ]; then + echo "storcli_controller_temp{controller=\"${c}\"} ${ctl_temp}" >> "$OUT_FILE" + fi + + # BBU state: ищем "BBU Status: Optimal" / "Failed" / "Missing" + bbu_line=$(echo "$ctl_info" | grep -i "BBU.*Status" | head -1) + bbu_state=0 + if [[ "$bbu_line" =~ Optimal|Good ]]; then + bbu_state=1 + elif [[ "$bbu_line" =~ Degraded|Weak ]]; then + bbu_state=2 + fi + echo "storcli_controller_bbu_state{controller=\"${c}\"} ${bbu_state}" >> "$OUT_FILE" + + # === Физические диски === + # В 1.17.08 /c0/eall/sall show даёт таблицу, но без заголовков — парсим по блокам + storcli /c$c/eall/sall show | awk -v c="$c" ' + BEGIN { + enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0; + media_err = 0; other_err = 0; in_pd = 0 + } + /^PD:/ { + if (enc != -1 && slot != -1) { + gsub(/ /, "_", model); + printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err } - enc=""; slot=""; model=""; temp=""; media_err=0; other_err=0; state_num=0 + enc = -1; slot = -1; model = "Unknown"; temp = 0; state_num = 0; + media_err = 0; other_err = 0; in_pd = 1 } - /Enclosure Device ID:/ { enc = $4 } - /Slot Number:/ { slot = $3 } - /Device Id:/ { if (enc == "") enc = $3 } # fallback - /Model Number:/ { model = $3 } - /Drive Temperature/ { - match($0, /([0-9]+)C/); - if (RSTART) temp = substr($0, RSTART, RLENGTH-1) + in_pd && /Enclosure Device ID:/ { enc = $4 } + in_pd && /Slot Number:/ { slot = $3 } + in_pd && /Device Id:/ { if (enc == -1) enc = $3 } # fallback + in_pd && /Model Number:/ { model = $3 } + in_pd && /Drive Temperature.*C/ { + match($0, /([0-9]+)C/); + if (RSTART) temp = substr($0, RSTART, RLENGTH-1) } - /Firmware state:/ { + in_pd && /Firmware state:/ { state = $3 if (state ~ /Online/) state_num = 2 else if (state ~ /Unconfigured.*Good/) state_num = 1 @@ -26,39 +94,59 @@ else if (state ~ /Offline/) state_num = 0 else state_num = -1 } - /Media Error Count:/ { media_err = $4 } - /Other Error Count:/ { other_err = $4 } + in_pd && /Media Error Count:/ { media_err = $4 } + in_pd && /Other Error Count:/ { other_err = $4 } END { - if (enc != "" && slot != "") { + if (enc != -1 && slot != -1) { gsub(/ /, "_", model); - printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, (temp ~ /^[0-9]+$/ ? temp : 0) + printf "storcli_disk_temp{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, temp printf "storcli_disk_state{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, state_num printf "storcli_disk_media_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, media_err printf "storcli_disk_other_error_count{controller=\"%s\",enclosure=\"%s\",slot=\"%s\",model=\"%s\"} %s\n", c, enc, slot, model, other_err } } - ' c="$c") + ' >> "$OUT_FILE" - echo "$disks_info" >> "$OUT_FILE" - - # Виртуальные диски (массивы) - vds=$(storcli /c$c/vall show | awk ' - NR>8 && $1 ~ /^[0-9]+$/ { - vd = $1; state = $2; size = $3; - gsub(/,/,"", size); - # state: Optl=Online(1), Dgrd=Degraded(2), Offln=Offline(0), Ft=Failed(3) - state_num = 1; - if (state == "Dgrd") state_num = 2; - else if (state ~ /Offln|Pdgd/) state_num = 0; - else if (state == "Ft") state_num = 3; - printf "storcli_array_state{controller=\"%s\",vd=\"%s\",size=\"%s\"} %s\n", c, vd, size, state_num - } - ') - echo "$vds" >> "$OUT_FILE" + # === Виртуальные диски (массивы) === + storcli /c$c/vall show J | jq -r --arg c "$c" ' + .Controllers[0].Response.Data | + to_entries[] | + select(.key | test("VD[0-9]+")) | + .value[] | + .["DG/VD"] as $dgvd | + .State as $state | + .Size as $size | + ($dgvd | split("/")[1]) as $vd | + ($state == "Optl" or $state == "Onln") as $is_online | + ($state == "Dgrd") as $is_degraded | + ($state == "Offln" or $state == "Pdgd") as $is_offline | + ($state == "Ft") as $is_failed | + (if $is_online then 1 + elif $is_degraded then 2 + elif $is_offline then 0 + elif $is_failed then 3 + else -1 end) as $state_num | + "storcli_array_state{controller=\"\($c)\",vd=\"\($vd)\",size=\"\($size)\"} \($state_num)" + ' 2>/dev/null >> "$OUT_FILE" || { + # fallback без jq (если не установлен) + storcli /c$c/vall show | awk -v c="$c" ' + NR <= 8 { next } + $1 ~ /^[0-9]+$/ { + vd = $1 + state = $2 + size = $3 + gsub(/,/,"",size) + state_num = 1 + if (state == "Dgrd") state_num = 2 + else if (state ~ /Offln|Pdgd/) state_num = 0 + else if (state == "Ft") state_num = 3 + printf "storcli_array_state{controller=\"%s\",vd=\"%s\",size=\"%s\"} %s\n", c, vd, size, state_num + } + ' >> "$OUT_FILE" + } done -# Атомарная замена mv "$OUT_FILE" "$FINAL_FILE" chmod 644 "$FINAL_FILE" -echo "$(date -Iseconds) storcli metrics updated" >&2 \ No newline at end of file +echo "$(date -Iseconds) storcli metrics updated successfully" >&2 \ No newline at end of file