Merge branch 'monitoring' into 'main'
Update 12 files See merge request root/Olimp!3
This commit is contained in:
commit
5d3c9c5e41
@ -27,19 +27,26 @@ base_packages:
|
|||||||
- iftop
|
- iftop
|
||||||
- ntp
|
- ntp
|
||||||
- pv
|
- pv
|
||||||
|
- jq
|
||||||
|
- unzip
|
||||||
|
|
||||||
system_scripts: []
|
system_scripts: []
|
||||||
custom_directories:
|
custom_directories:
|
||||||
- /opt/scripts
|
- /opt/scripts
|
||||||
- /etc/apt/keyrings
|
- /etc/apt/keyrings
|
||||||
|
|
||||||
|
ssh_public_keys: []
|
||||||
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHvRBW+2Xpck2tznhWJyls5J/4wUoVYdyFM6JTU7uogK ansible@olimp"
|
||||||
|
- "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCbvnGZxQEGYuScClONbkbfVn2+Uo1kYYztXqMf9ku1lHkw+7IZa00LOMwv7QGBRvrtBcw+TWqaMst5FZ3R6oWcQc+nkBEYoRXe4f3AuuFAl9C9F6sEYM8fX6mAHIlWQhFyVslazZtVTQwnfRV0rnbtCduCu9liywM3fShFqBVwq7Y4nBjG648Zq+VfCHpbBE9XkZaMDyeOXdtppmLetywnBS33mbXMDgH09PMlRz097xfZLkpFdSi8WtDOtKSBiEHtZ+H0EZ42Cda2xMnqlgVtPxWGUirvv6CvDyTmuMzrjALZoSKhl3iD6Szd1YOJcAw6bv9gbJKxPkZchrB65ZXT ZailonOlimp"
|
||||||
|
|
||||||
|
|
||||||
# Удаляем мусорные пакеты везде
|
# Удаляем мусорные пакеты везде
|
||||||
cleanup_packages:
|
cleanup_packages:
|
||||||
- gparted
|
- gparted
|
||||||
|
|
||||||
pve_exporter_user: "pve_exporter@pve"
|
pve_exporter_user: "pve_exporter@pve"
|
||||||
pve_exporter_token_name: "grafana"
|
pve_exporter_token_name: "grafana"
|
||||||
pve_exporter_token_value: "93f61884-7c2f-40b6-ae6c-ab36a4eba467"
|
pve_exporter_token_value: "ae683c34-c539-4b08-b539-6c9b7e570411"
|
||||||
|
|
||||||
# ------------ gateway (192.168.1.201) ------------
|
# ------------ gateway (192.168.1.201) ------------
|
||||||
npm_base_dir: "/opt/npm"
|
npm_base_dir: "/opt/npm"
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
- hosts: pve-server
|
- hosts: pve-server
|
||||||
roles:
|
roles:
|
||||||
- { role: proxmox_base_setup, tags: deploy_proxmox_base }
|
- { role: proxmox_base_setup, tags: deploy_proxmox_base }
|
||||||
- { role: pve_monitoring, tags: deploy_pve_monitoring }
|
- { role: proxmox_monitoring, tags: deploy_proxmox_monitoring }
|
||||||
|
|
||||||
- hosts: gateway-server
|
- hosts: gateway-server
|
||||||
roles:
|
roles:
|
||||||
|
|||||||
@ -1,14 +1,13 @@
|
|||||||
---
|
---
|
||||||
- name: restart ssh
|
- name: restart ssh
|
||||||
service:
|
systemd:
|
||||||
name: ssh
|
name: ssh
|
||||||
state: restarted
|
state: restarted
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: restart node_exporter
|
||||||
- name: Reboot system
|
systemd:
|
||||||
reboot:
|
name: node_exporter
|
||||||
msg: "Reboot triggered by base setup"
|
state: restarted
|
||||||
connect_timeout: 5
|
daemon_reload: yes
|
||||||
reboot_timeout: 300
|
become: yes
|
||||||
pre_reboot_delay: 0
|
|
||||||
post_reboot_delay: 30
|
|
||||||
@ -4,17 +4,20 @@
|
|||||||
upgrade: full
|
upgrade: full
|
||||||
update_cache: yes
|
update_cache: yes
|
||||||
cache_valid_time: 3600
|
cache_valid_time: 3600
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Install base packages
|
- name: Install base packages
|
||||||
apt:
|
apt:
|
||||||
name: "{{ base_packages }}"
|
name: "{{ base_packages }}"
|
||||||
state: present
|
state: present
|
||||||
update_cache: yes
|
update_cache: yes
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Remove unused packages
|
- name: Remove unused packages
|
||||||
apt:
|
apt:
|
||||||
autoremove: yes
|
autoremove: yes
|
||||||
autoclean: yes
|
autoclean: yes
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Disable IPv6 via sysctl
|
- name: Disable IPv6 via sysctl
|
||||||
sysctl:
|
sysctl:
|
||||||
@ -26,12 +29,14 @@
|
|||||||
loop:
|
loop:
|
||||||
- { name: 'net.ipv6.conf.all.disable_ipv6', value: '1' }
|
- { name: 'net.ipv6.conf.all.disable_ipv6', value: '1' }
|
||||||
- { name: 'net.ipv6.conf.default.disable_ipv6', value: '1' }
|
- { name: 'net.ipv6.conf.default.disable_ipv6', value: '1' }
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Ensure /root/.bashrc exists
|
- name: Ensure /root/.bashrc exists
|
||||||
file:
|
file:
|
||||||
path: /root/.bashrc
|
path: /root/.bashrc
|
||||||
state: touch
|
state: touch
|
||||||
mode: '0644'
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Add custom aliases and environment to ~/.bashrc
|
- name: Add custom aliases and environment to ~/.bashrc
|
||||||
blockinfile:
|
blockinfile:
|
||||||
@ -75,15 +80,18 @@
|
|||||||
export HISTTIMEFORMAT='%F %T '
|
export HISTTIMEFORMAT='%F %T '
|
||||||
owner: root
|
owner: root
|
||||||
mode: '0644'
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Configure timezone
|
- name: Configure timezone
|
||||||
timezone:
|
timezone:
|
||||||
name: "{{ timezone }}"
|
name: "{{ timezone }}"
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Configure locale
|
- name: Configure locale
|
||||||
locale_gen:
|
locale_gen:
|
||||||
name: "{{ system_locale }}"
|
name: "{{ system_locale }}"
|
||||||
state: present
|
state: present
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Set default locale
|
- name: Set default locale
|
||||||
lineinfile:
|
lineinfile:
|
||||||
@ -91,34 +99,37 @@
|
|||||||
line: "LANG={{ system_locale }}"
|
line: "LANG={{ system_locale }}"
|
||||||
state: present
|
state: present
|
||||||
create: yes
|
create: yes
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Ensure required directories exist
|
- name: Ensure required directories exist
|
||||||
file:
|
file:
|
||||||
path: "{{ item }}"
|
path: "{{ item }}"
|
||||||
state: directory
|
state: directory
|
||||||
mode: '0755'
|
mode: '0755'
|
||||||
loop:
|
loop: "{{ custom_directories | default([]) }}"
|
||||||
- /opt/scripts
|
become: yes
|
||||||
- /etc/apt/keyrings
|
|
||||||
|
|
||||||
- name: Install Python requests library (if needed)
|
- name: Install Python requests library (if needed)
|
||||||
apt:
|
apt:
|
||||||
name: python3-requests
|
name: python3-requests
|
||||||
state: present
|
state: present
|
||||||
when: ansible_connection != "local"
|
when: ansible_connection != "local"
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Ensure SSH directory exists
|
- name: Ensure SSH directory exists
|
||||||
file:
|
file:
|
||||||
path: /root/.ssh
|
path: /root/.ssh
|
||||||
state: directory
|
state: directory
|
||||||
mode: '0700'
|
mode: '0700'
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Add authorized key for root
|
- name: Add authorized key for root
|
||||||
authorized_key:
|
authorized_key:
|
||||||
user: root
|
user: root
|
||||||
state: present
|
state: present
|
||||||
key: "{{ item }}"
|
key: "{{ item }}"
|
||||||
loop: "{{ ssh_public_keys | default([]) }}"
|
loop: "{{ ssh_public_keys }}"
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Configure SSH security
|
- name: Configure SSH security
|
||||||
lineinfile:
|
lineinfile:
|
||||||
@ -131,75 +142,154 @@
|
|||||||
- { regexp: '^PermitRootLogin', line: 'PermitRootLogin prohibit-password' }
|
- { regexp: '^PermitRootLogin', line: 'PermitRootLogin prohibit-password' }
|
||||||
- { regexp: '^PubkeyAuthentication', line: 'PubkeyAuthentication yes' }
|
- { regexp: '^PubkeyAuthentication', line: 'PubkeyAuthentication yes' }
|
||||||
notify: restart ssh
|
notify: restart ssh
|
||||||
|
become: yes
|
||||||
|
|
||||||
# ========== Node Exporter Installation ==========
|
# ========== Node Exporter Installation ==========
|
||||||
- name: Create node_exporter system user
|
- name: Create node_exporter system user
|
||||||
ansible.builtin.user:
|
user:
|
||||||
name: node_exporter
|
name: node_exporter
|
||||||
system: yes
|
system: yes
|
||||||
shell: /usr/sbin/nologin
|
shell: /usr/sbin/nologin
|
||||||
create_home: no
|
create_home: no
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Download and extract node_exporter binary
|
- name: Download node_exporter
|
||||||
ansible.builtin.unarchive:
|
get_url:
|
||||||
src: "https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz"
|
url: "https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz"
|
||||||
dest: /tmp
|
dest: /tmp/node_exporter.tar.gz
|
||||||
|
mode: '0644'
|
||||||
|
timeout: 60
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create temporary extraction directory
|
||||||
|
file:
|
||||||
|
path: /tmp/node_exporter_temp
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Extract node_exporter
|
||||||
|
unarchive:
|
||||||
|
src: /tmp/node_exporter.tar.gz
|
||||||
|
dest: /tmp/node_exporter_temp
|
||||||
remote_src: yes
|
remote_src: yes
|
||||||
creates: /usr/local/bin/node_exporter
|
creates: /tmp/node_exporter_temp/node_exporter-1.8.2.linux-amd64/node_exporter
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Install node_exporter binary
|
- name: Install node_exporter binary
|
||||||
ansible.builtin.copy:
|
copy:
|
||||||
src: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
|
src: /tmp/node_exporter_temp/node_exporter-1.8.2.linux-amd64/node_exporter
|
||||||
dest: /usr/local/bin/node_exporter
|
dest: /usr/local/bin/node_exporter
|
||||||
owner: root
|
owner: root
|
||||||
group: root
|
group: root
|
||||||
mode: '0755'
|
mode: '0755'
|
||||||
remote_src: yes
|
remote_src: yes
|
||||||
|
become: yes
|
||||||
|
notify: restart node_exporter
|
||||||
|
|
||||||
|
- name: Clean up temporary files
|
||||||
|
file:
|
||||||
|
path: "{{ item }}"
|
||||||
|
state: absent
|
||||||
|
loop:
|
||||||
|
- /tmp/node_exporter.tar.gz
|
||||||
|
- /tmp/node_exporter_temp
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Create textfile collector directory
|
- name: Create textfile collector directory
|
||||||
ansible.builtin.file:
|
file:
|
||||||
path: /var/lib/node_exporter/textfile_collector
|
path: /var/lib/node_exporter/textfile_collector
|
||||||
state: directory
|
state: directory
|
||||||
owner: node_exporter
|
owner: node_exporter
|
||||||
group: node_exporter
|
group: node_exporter
|
||||||
mode: '0755'
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Deploy node_exporter systemd service
|
- name: Deploy node_exporter systemd service
|
||||||
ansible.builtin.copy:
|
copy:
|
||||||
content: |
|
content: |
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Node Exporter
|
Description=Prometheus Node Exporter
|
||||||
|
Documentation=https://github.com/prometheus/node_exporter
|
||||||
After=network.target
|
After=network.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
User=node_exporter
|
User=node_exporter
|
||||||
|
Group=node_exporter
|
||||||
ExecStart=/usr/local/bin/node_exporter \
|
ExecStart=/usr/local/bin/node_exporter \
|
||||||
--collector.systemd \
|
--collector.systemd \
|
||||||
--collector.processes \
|
--collector.processes \
|
||||||
|
--collector.cpu \
|
||||||
|
--collector.meminfo \
|
||||||
|
--collector.diskstats \
|
||||||
|
--collector.netdev \
|
||||||
|
--collector.filesystem \
|
||||||
|
--collector.loadavg \
|
||||||
|
--collector.time \
|
||||||
--collector.textfile.directory=/var/lib/node_exporter/textfile_collector \
|
--collector.textfile.directory=/var/lib/node_exporter/textfile_collector \
|
||||||
--web.listen-address=:9100
|
--web.listen-address=0.0.0.0:9100 \
|
||||||
|
--web.telemetry-path=/metrics
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=5
|
RestartSec=5
|
||||||
|
|
||||||
|
# Security settings
|
||||||
|
NoNewPrivileges=yes
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=yes
|
||||||
|
PrivateTmp=yes
|
||||||
|
ProtectControlGroups=yes
|
||||||
|
ProtectKernelModules=yes
|
||||||
|
ProtectKernelTunables=yes
|
||||||
|
LockPersonality=yes
|
||||||
|
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
dest: /etc/systemd/system/node_exporter.service
|
dest: /etc/systemd/system/node_exporter.service
|
||||||
owner: root
|
owner: root
|
||||||
group: root
|
group: root
|
||||||
mode: '0644'
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
notify: restart node_exporter
|
||||||
|
|
||||||
- name: Reload systemd and start node_exporter
|
- name: Start and enable node_exporter
|
||||||
ansible.builtin.systemd:
|
systemd:
|
||||||
name: node_exporter
|
name: node_exporter
|
||||||
state: started
|
state: started
|
||||||
enabled: yes
|
enabled: yes
|
||||||
daemon_reload: yes
|
daemon_reload: yes
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Wait for node_exporter to start
|
||||||
|
wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: 9100
|
||||||
|
timeout: 30
|
||||||
|
state: started
|
||||||
|
delay: 5
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Verify node_exporter is responding
|
||||||
|
uri:
|
||||||
|
url: http://localhost:9100/metrics
|
||||||
|
status_code: 200
|
||||||
|
timeout: 10
|
||||||
|
register: node_exporter_check
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Show node_exporter status
|
||||||
|
debug:
|
||||||
|
msg: "Node Exporter is running and responding on port 9100"
|
||||||
|
when: node_exporter_check.status == 200
|
||||||
|
|
||||||
- name: Allow port 9100 in ufw (if enabled)
|
- name: Allow port 9100 in ufw (if enabled)
|
||||||
ansible.builtin.ufw:
|
ufw:
|
||||||
rule: allow
|
rule: allow
|
||||||
port: 9100
|
port: 9100
|
||||||
proto: tcp
|
proto: tcp
|
||||||
comment: "Prometheus Node Exporter"
|
comment: "Prometheus Node Exporter"
|
||||||
when: ansible_facts.services["ufw.service"] is defined and ansible_facts.services["ufw.service"]["state"] == "running"
|
when:
|
||||||
|
- ansible_facts.services["ufw.service"] is defined
|
||||||
|
- ansible_facts.services["ufw.service"]["state"] == "running"
|
||||||
|
become: yes
|
||||||
@ -1,9 +1,60 @@
|
|||||||
global:
|
global:
|
||||||
scrape_interval: 30s
|
scrape_interval: 30s
|
||||||
|
external_labels:
|
||||||
|
cluster: 'olimp'
|
||||||
|
environment: 'production'
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
|
# ========== Proxmox серверы ==========
|
||||||
|
- job_name: 'proxmox'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['192.168.1.200:9223']
|
||||||
|
metrics_path: /pve
|
||||||
|
params:
|
||||||
|
module: [default]
|
||||||
|
scrape_interval: 30s
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: instance
|
||||||
|
replacement: '192.168.1.200:9223'
|
||||||
|
|
||||||
|
# ========== Node Exporter со всех серверов ==========
|
||||||
- job_name: 'node'
|
- job_name: 'node'
|
||||||
static_configs:
|
static_configs:
|
||||||
{%- for host in groups['all'] %}
|
- targets:
|
||||||
- targets: ['{{ hostvars[host].int_ip }}:9100']
|
- '192.168.1.200:9100' # Proxmox
|
||||||
{%- endfor %}
|
- '192.168.1.201:9100' # Gateway
|
||||||
|
- '192.168.1.202:9100' # Data
|
||||||
|
- '192.168.1.203:9100' # Media
|
||||||
|
- '192.168.1.204:9100' # Photo
|
||||||
|
- '192.168.1.205:9100' # Nextcloud
|
||||||
|
- '192.168.1.206:9100' # Talk
|
||||||
|
- '192.168.1.207:9100' # Games
|
||||||
|
- '192.168.1.228:9100' # Manage
|
||||||
|
scrape_interval: 30s
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: instance
|
||||||
|
regex: '(.*):9100'
|
||||||
|
replacement: '${1}'
|
||||||
|
|
||||||
|
# ========== Self-monitoring vmagent ==========
|
||||||
|
- job_name: 'vmagent'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['vmagent:8429']
|
||||||
|
scrape_interval: 30s
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: instance
|
||||||
|
replacement: 'vmagent'
|
||||||
|
|
||||||
|
# ========== VictoriaMetrics self-monitoring ==========
|
||||||
|
- job_name: 'victoriametrics'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['victoriametrics:8428']
|
||||||
|
scrape_interval: 30s
|
||||||
|
metrics_path: /metrics
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: instance
|
||||||
|
replacement: 'victoriametrics'
|
||||||
@ -24,9 +24,12 @@ services:
|
|||||||
- '-promscrape.config=/config/vmagent.yaml'
|
- '-promscrape.config=/config/vmagent.yaml'
|
||||||
- '-remoteWrite.tmpDataPath=/tmpData'
|
- '-remoteWrite.tmpDataPath=/tmpData'
|
||||||
- '-remoteWrite.url=http://victoriametrics:8428/api/v1/write'
|
- '-remoteWrite.url=http://victoriametrics:8428/api/v1/write'
|
||||||
|
- '-promscrape.suppressScrapeErrors=false'
|
||||||
volumes:
|
volumes:
|
||||||
- {{ grafana_vmagent_config }}:/config/vmagent.yaml:ro
|
- {{ grafana_vmagent_config }}:/config/vmagent.yaml:ro
|
||||||
- {{ grafana_vmagent_tmp_dir }}:/tmpData
|
- {{ grafana_vmagent_tmp_dir }}:/tmpData
|
||||||
|
ports:
|
||||||
|
- "8429:8429" # Для мониторинга самого vmagent
|
||||||
depends_on:
|
depends_on:
|
||||||
- victoriametrics
|
- victoriametrics
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@ -150,153 +150,3 @@
|
|||||||
- { key: 'PubkeyAuthentication', value: 'yes' }
|
- { key: 'PubkeyAuthentication', value: 'yes' }
|
||||||
- { key: 'X11Forwarding', value: 'no' }
|
- { key: 'X11Forwarding', value: 'no' }
|
||||||
notify: restart ssh
|
notify: restart ssh
|
||||||
|
|
||||||
# ========== Node Exporter ==========
|
|
||||||
- name: Create node_exporter user
|
|
||||||
user:
|
|
||||||
name: node_exporter
|
|
||||||
system: yes
|
|
||||||
shell: /usr/sbin/nologin
|
|
||||||
create_home: no
|
|
||||||
|
|
||||||
- name: Download node_exporter
|
|
||||||
get_url:
|
|
||||||
url: "https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz"
|
|
||||||
dest: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz
|
|
||||||
checksum: "sha256:https://github.com/prometheus/node_exporter/releases/download/v1.8.2/sha256sums.txt"
|
|
||||||
mode: '0644'
|
|
||||||
timeout: 60
|
|
||||||
|
|
||||||
- name: Extract node_exporter
|
|
||||||
unarchive:
|
|
||||||
src: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz
|
|
||||||
dest: /tmp/
|
|
||||||
remote_src: yes
|
|
||||||
creates: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
|
|
||||||
|
|
||||||
- name: Install node_exporter binary
|
|
||||||
copy:
|
|
||||||
src: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
|
|
||||||
dest: /usr/local/bin/node_exporter
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: '0755'
|
|
||||||
remote_src: yes
|
|
||||||
notify: restart node_exporter
|
|
||||||
|
|
||||||
- name: Create textfile_collector directory
|
|
||||||
file:
|
|
||||||
path: /var/lib/node_exporter/textfile_collector
|
|
||||||
state: directory
|
|
||||||
owner: node_exporter
|
|
||||||
group: node_exporter
|
|
||||||
mode: '0755'
|
|
||||||
|
|
||||||
- name: Deploy node_exporter systemd service
|
|
||||||
template:
|
|
||||||
src: node_exporter.service.j2
|
|
||||||
dest: /etc/systemd/system/node_exporter.service
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: '0644'
|
|
||||||
notify: restart node_exporter
|
|
||||||
|
|
||||||
# ========== storcli — проверка и сбор метрик (без установки) ==========
|
|
||||||
- name: Detect MegaRAID controller
|
|
||||||
command: "lspci -d 1000:"
|
|
||||||
register: lspci_megaraid
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: Check storcli binary exists
|
|
||||||
stat:
|
|
||||||
path: /opt/MegaRAID/storcli/storcli64
|
|
||||||
register: storcli_bin
|
|
||||||
when: lspci_megaraid.rc == 0
|
|
||||||
|
|
||||||
- name: Ensure storcli symlink in PATH
|
|
||||||
file:
|
|
||||||
src: /opt/MegaRAID/storcli/storcli64
|
|
||||||
dest: /usr/local/bin/storcli
|
|
||||||
state: link
|
|
||||||
force: yes
|
|
||||||
when:
|
|
||||||
- lspci_megaraid.rc == 0
|
|
||||||
- storcli_bin.stat.exists
|
|
||||||
|
|
||||||
- name: Verify storcli functionality
|
|
||||||
command: storcli /call show
|
|
||||||
register: storcli_test
|
|
||||||
changed_when: false
|
|
||||||
failed_when:
|
|
||||||
- storcli_test.rc != 0
|
|
||||||
- "'Controller' not in storcli_test.stdout"
|
|
||||||
when:
|
|
||||||
- lspci_megaraid.rc == 0
|
|
||||||
- storcli_bin.stat.exists
|
|
||||||
|
|
||||||
- name: Set fact — storcli is available
|
|
||||||
set_fact:
|
|
||||||
storcli_available: true
|
|
||||||
when:
|
|
||||||
- lspci_megaraid.rc == 0
|
|
||||||
- storcli_bin.stat.exists
|
|
||||||
- storcli_test is success
|
|
||||||
|
|
||||||
- name: Set fact — storcli NOT available
|
|
||||||
set_fact:
|
|
||||||
storcli_available: false
|
|
||||||
when: not (lspci_megaraid.rc == 0 and storcli_bin.stat.exists)
|
|
||||||
|
|
||||||
# Сбор метрик ТОЛЬКО при наличии storcli
|
|
||||||
- name: Deploy storcli metrics script
|
|
||||||
template:
|
|
||||||
src: storcli_metrics.sh.j2
|
|
||||||
dest: /opt/scripts/storcli_metrics.sh
|
|
||||||
owner: root
|
|
||||||
group: root
|
|
||||||
mode: '0755'
|
|
||||||
when: storcli_available | default(false)
|
|
||||||
|
|
||||||
- name: Deploy storcli_metrics systemd units
|
|
||||||
block:
|
|
||||||
- name: Create storcli_metrics.service
|
|
||||||
copy:
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=Collect RAID/disk metrics via storcli
|
|
||||||
After=network.target
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
ExecStart=/opt/scripts/storcli_metrics.sh
|
|
||||||
User=root
|
|
||||||
StandardOutput=journal
|
|
||||||
StandardError=journal
|
|
||||||
dest: /etc/systemd/system/storcli_metrics.service
|
|
||||||
owner: root
|
|
||||||
mode: '0644'
|
|
||||||
|
|
||||||
- name: Create storcli_metrics.timer (every 5 min)
|
|
||||||
copy:
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=Run storcli metrics collector every 5 minutes
|
|
||||||
Requires=storcli_metrics.service
|
|
||||||
[Timer]
|
|
||||||
OnBootSec=60
|
|
||||||
OnUnitActiveSec=5m
|
|
||||||
AccuracySec=1s
|
|
||||||
[Install]
|
|
||||||
WantedBy=timers.target
|
|
||||||
dest: /etc/systemd/system/storcli_metrics.timer
|
|
||||||
owner: root
|
|
||||||
mode: '0644'
|
|
||||||
|
|
||||||
- name: Enable & start storcli_metrics.timer
|
|
||||||
systemd:
|
|
||||||
name: storcli_metrics.timer
|
|
||||||
state: started
|
|
||||||
enabled: yes
|
|
||||||
daemon_reload: yes
|
|
||||||
when: storcli_available | default(false)
|
|
||||||
notify: restart storcli_metrics
|
|
||||||
19
roles/proxmox_monitoring/handlers/main.yml
Normal file
19
roles/proxmox_monitoring/handlers/main.yml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
- name: restart node_exporter
|
||||||
|
systemd:
|
||||||
|
name: node_exporter
|
||||||
|
state: restarted
|
||||||
|
daemon_reload: yes
|
||||||
|
enabled: yes
|
||||||
|
|
||||||
|
- name: restart pve_exporter
|
||||||
|
systemd:
|
||||||
|
name: pve_exporter
|
||||||
|
state: restarted
|
||||||
|
daemon_reload: yes
|
||||||
|
|
||||||
|
- name: restart storcli_metrics
|
||||||
|
systemd:
|
||||||
|
name: storcli_metrics.timer
|
||||||
|
state: restarted
|
||||||
|
daemon_reload: yes
|
||||||
306
roles/proxmox_monitoring/tasks/main.yml
Normal file
306
roles/proxmox_monitoring/tasks/main.yml
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
---
|
||||||
|
- name: Update package cache
|
||||||
|
apt:
|
||||||
|
update_cache: yes
|
||||||
|
cache_valid_time: 86400
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Install monitoring dependencies
|
||||||
|
apt:
|
||||||
|
name:
|
||||||
|
- python3
|
||||||
|
- python3-pip
|
||||||
|
- python3-venv
|
||||||
|
- curl
|
||||||
|
- wget
|
||||||
|
- jq
|
||||||
|
- smartmontools
|
||||||
|
state: present
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
# ========== 1. Установка storcli с проверкой ==========
|
||||||
|
- name: Check if storcli is already installed
|
||||||
|
stat:
|
||||||
|
path: /opt/MegaRAID/storcli/storcli64
|
||||||
|
register: storcli_installed
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Download storcli
|
||||||
|
get_url:
|
||||||
|
url: https://docs.broadcom.com/docs-and-downloads/raid-controllers/raid-controllers-common-files/storcli_1.24.02-1_all.deb
|
||||||
|
dest: /tmp/storcli.deb
|
||||||
|
mode: '0644'
|
||||||
|
when: not storcli_installed.stat.exists
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Install storcli
|
||||||
|
apt:
|
||||||
|
deb: /tmp/storcli.deb
|
||||||
|
state: present
|
||||||
|
when: not storcli_installed.stat.exists
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create storcli symlink
|
||||||
|
file:
|
||||||
|
src: /opt/MegaRAID/storcli/storcli64
|
||||||
|
dest: /usr/local/bin/storcli
|
||||||
|
state: link
|
||||||
|
force: yes
|
||||||
|
when: storcli_installed.stat.exists or not storcli_installed.stat.exists
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Verify storcli works
|
||||||
|
command: /opt/MegaRAID/storcli/storcli64 /c0 show
|
||||||
|
register: storcli_test
|
||||||
|
changed_when: false
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Set fact — storcli is available
|
||||||
|
set_fact:
|
||||||
|
storcli_available: true
|
||||||
|
when: storcli_test.rc == 0
|
||||||
|
|
||||||
|
# ========== 2. Node Exporter ==========
|
||||||
|
- name: Create node_exporter user
|
||||||
|
user:
|
||||||
|
name: node_exporter
|
||||||
|
system: yes
|
||||||
|
shell: /usr/sbin/nologin
|
||||||
|
create_home: no
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Download node_exporter
|
||||||
|
get_url:
|
||||||
|
url: "https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz"
|
||||||
|
dest: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz
|
||||||
|
checksum: "sha256:https://github.com/prometheus/node_exporter/releases/download/v1.8.2/sha256sums.txt"
|
||||||
|
mode: '0644'
|
||||||
|
timeout: 60
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Extract node_exporter
|
||||||
|
unarchive:
|
||||||
|
src: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz
|
||||||
|
dest: /tmp/
|
||||||
|
remote_src: yes
|
||||||
|
creates: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Install node_exporter binary
|
||||||
|
copy:
|
||||||
|
src: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
|
||||||
|
dest: /usr/local/bin/node_exporter
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0755'
|
||||||
|
remote_src: yes
|
||||||
|
become: yes
|
||||||
|
notify: restart node_exporter
|
||||||
|
|
||||||
|
- name: Create textfile_collector directory
|
||||||
|
file:
|
||||||
|
path: /var/lib/node_exporter/textfile_collector
|
||||||
|
state: directory
|
||||||
|
owner: node_exporter
|
||||||
|
group: node_exporter
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Deploy node_exporter systemd service
|
||||||
|
template:
|
||||||
|
src: node_exporter.service.j2
|
||||||
|
dest: /etc/systemd/system/node_exporter.service
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
notify: restart node_exporter
|
||||||
|
|
||||||
|
# ========== 3. PVE Exporter ==========
|
||||||
|
- name: Create pve_exporter user
|
||||||
|
user:
|
||||||
|
name: pve_exporter
|
||||||
|
system: yes
|
||||||
|
shell: /usr/sbin/nologin
|
||||||
|
create_home: no
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create pve_exporter directories
|
||||||
|
file:
|
||||||
|
path: "{{ item }}"
|
||||||
|
state: directory
|
||||||
|
owner: pve_exporter
|
||||||
|
group: pve_exporter
|
||||||
|
mode: '0755'
|
||||||
|
loop:
|
||||||
|
- /opt/pve_exporter
|
||||||
|
- /opt/pve_exporter/config
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create Python virtual environment
|
||||||
|
command:
|
||||||
|
cmd: python3 -m venv /opt/pve_exporter/venv
|
||||||
|
creates: /opt/pve_exporter/venv/bin/python
|
||||||
|
become: yes
|
||||||
|
become_user: pve_exporter
|
||||||
|
environment:
|
||||||
|
HOME: /opt/pve_exporter
|
||||||
|
|
||||||
|
- name: Upgrade pip in virtual environment
|
||||||
|
command:
|
||||||
|
cmd: /opt/pve_exporter/venv/bin/pip install --upgrade pip
|
||||||
|
chdir: /opt/pve_exporter
|
||||||
|
become: yes
|
||||||
|
become_user: pve_exporter
|
||||||
|
environment:
|
||||||
|
HOME: /opt/pve_exporter
|
||||||
|
|
||||||
|
- name: Install prometheus-pve-exporter package
|
||||||
|
command:
|
||||||
|
cmd: /opt/pve_exporter/venv/bin/pip install prometheus-pve-exporter
|
||||||
|
chdir: /opt/pve_exporter
|
||||||
|
become: yes
|
||||||
|
become_user: pve_exporter
|
||||||
|
environment:
|
||||||
|
HOME: /opt/pve_exporter
|
||||||
|
|
||||||
|
- name: Deploy pve_exporter config
|
||||||
|
template:
|
||||||
|
src: pve_exporter_config.yml.j2
|
||||||
|
dest: /opt/pve_exporter/config/config.yml
|
||||||
|
owner: pve_exporter
|
||||||
|
group: pve_exporter
|
||||||
|
mode: '0600'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create pve_exporter systemd service
|
||||||
|
copy:
|
||||||
|
content: |
|
||||||
|
[Unit]
|
||||||
|
Description=Proxmox VE Exporter
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=pve_exporter
|
||||||
|
WorkingDirectory=/opt/pve_exporter
|
||||||
|
Environment="HOME=/opt/pve_exporter"
|
||||||
|
ExecStart=/opt/pve_exporter/venv/bin/pve_exporter \
|
||||||
|
--web.listen-address=0.0.0.0:9223 \
|
||||||
|
--config.file=/opt/pve_exporter/config/config.yml
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=pve_exporter
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
dest: /etc/systemd/system/pve_exporter.service
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
notify: restart pve_exporter
|
||||||
|
|
||||||
|
# ========== 4. StorCLI Metrics ==========
|
||||||
|
- name: Deploy storcli metrics script
|
||||||
|
template:
|
||||||
|
src: storcli_metrics.sh.j2
|
||||||
|
dest: /opt/scripts/storcli_metrics.sh
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0755'
|
||||||
|
when: storcli_available | default(false)
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Deploy storcli_metrics systemd units
|
||||||
|
block:
|
||||||
|
- name: Create storcli_metrics.service
|
||||||
|
copy:
|
||||||
|
content: |
|
||||||
|
[Unit]
|
||||||
|
Description=Collect RAID/disk metrics via storcli
|
||||||
|
After=network.target
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/opt/scripts/storcli_metrics.sh
|
||||||
|
User=root
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
dest: /etc/systemd/system/storcli_metrics.service
|
||||||
|
owner: root
|
||||||
|
mode: '0644'
|
||||||
|
|
||||||
|
- name: Create storcli_metrics.timer (every 5 min)
|
||||||
|
copy:
|
||||||
|
content: |
|
||||||
|
[Unit]
|
||||||
|
Description=Run storcli metrics collector every 5 minutes
|
||||||
|
Requires=storcli_metrics.service
|
||||||
|
[Timer]
|
||||||
|
OnBootSec=60
|
||||||
|
OnUnitActiveSec=5m
|
||||||
|
AccuracySec=1s
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
|
dest: /etc/systemd/system/storcli_metrics.timer
|
||||||
|
owner: root
|
||||||
|
mode: '0644'
|
||||||
|
|
||||||
|
- name: Enable & start storcli_metrics.timer
|
||||||
|
systemd:
|
||||||
|
name: storcli_metrics.timer
|
||||||
|
state: started
|
||||||
|
enabled: yes
|
||||||
|
daemon_reload: yes
|
||||||
|
when: storcli_available | default(false)
|
||||||
|
become: yes
|
||||||
|
notify: restart storcli_metrics
|
||||||
|
|
||||||
|
# ========== 5. Запуск и проверка сервисов ==========
|
||||||
|
- name: Start and enable all services
|
||||||
|
systemd:
|
||||||
|
name: "{{ item }}"
|
||||||
|
state: started
|
||||||
|
enabled: yes
|
||||||
|
daemon_reload: yes
|
||||||
|
loop:
|
||||||
|
- node_exporter
|
||||||
|
- pve_exporter
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Wait for services to initialize
|
||||||
|
wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: "{{ item.port }}"
|
||||||
|
timeout: 30
|
||||||
|
state: started
|
||||||
|
delay: 5
|
||||||
|
loop:
|
||||||
|
- { port: 9100, service: "node_exporter" }
|
||||||
|
- { port: 9223, service: "pve_exporter" }
|
||||||
|
become: yes
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Verify services are responding
|
||||||
|
uri:
|
||||||
|
url: "http://localhost:{{ item.port }}/metrics"
|
||||||
|
status_code: 200
|
||||||
|
timeout: 10
|
||||||
|
loop:
|
||||||
|
- { port: 9100, service: "node_exporter" }
|
||||||
|
- { port: 9223, service: "pve_exporter" }
|
||||||
|
register: service_checks
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Show service status
|
||||||
|
debug:
|
||||||
|
msg: "{{ item.item.service }} - {{ item.status }}"
|
||||||
|
loop: "{{ service_checks.results }}"
|
||||||
|
loop_control:
|
||||||
|
label: "{{ item.item.service }}"
|
||||||
|
|
||||||
|
- name: Run initial storcli metrics collection
|
||||||
|
command: /opt/scripts/storcli_metrics.sh
|
||||||
|
when: storcli_available | default(false)
|
||||||
|
become: yes
|
||||||
|
changed_when: false
|
||||||
38
roles/proxmox_monitoring/templates/node_exporter.service.j2
Normal file
38
roles/proxmox_monitoring/templates/node_exporter.service.j2
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Prometheus Node Exporter
|
||||||
|
Documentation=https://github.com/prometheus/node_exporter
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=node_exporter
|
||||||
|
Group=node_exporter
|
||||||
|
ExecStart=/usr/local/bin/node_exporter \
|
||||||
|
--collector.systemd \
|
||||||
|
--collector.processes \
|
||||||
|
--collector.cpu \
|
||||||
|
--collector.meminfo \
|
||||||
|
--collector.diskstats \
|
||||||
|
--collector.netdev \
|
||||||
|
--collector.filesystem \
|
||||||
|
--collector.loadavg \
|
||||||
|
--collector.time \
|
||||||
|
--collector.textfile.directory=/var/lib/node_exporter/textfile_collector \
|
||||||
|
--web.listen-address=0.0.0.0:9100 \
|
||||||
|
--web.telemetry-path=/metrics
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
|
||||||
|
# Безопасность (Proxmox-совместимо)
|
||||||
|
NoNewPrivileges=yes
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=yes
|
||||||
|
PrivateTmp=yes
|
||||||
|
ProtectControlGroups=yes
|
||||||
|
ProtectKernelModules=yes
|
||||||
|
ProtectKernelTunables=yes
|
||||||
|
LockPersonality=yes
|
||||||
|
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@ -0,0 +1,5 @@
|
|||||||
|
default:
|
||||||
|
username: "{{ pve_exporter_user }}"
|
||||||
|
token_name: "{{ pve_exporter_token_name }}"
|
||||||
|
token_value: "{{ pve_exporter_token_value }}"
|
||||||
|
verify_ssl: false
|
||||||
78
roles/proxmox_monitoring/templates/storcli_metrics.sh.j2
Normal file
78
roles/proxmox_monitoring/templates/storcli_metrics.sh.j2
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
OUT_FILE="/var/lib/node_exporter/textfile_collector/storcli.prom"
|
||||||
|
|
||||||
|
# Очищаем файл
|
||||||
|
> "$OUT_FILE"
|
||||||
|
|
||||||
|
# Заголовки метрик
|
||||||
|
echo "# HELP storcli_disk_temp Temperature of physical disk in Celsius" >> "$OUT_FILE"
|
||||||
|
echo "# TYPE storcli_disk_temp gauge" >> "$OUT_FILE"
|
||||||
|
echo "# HELP storcli_disk_state Disk state: 0=Offline,1=UGood,2=Online,3=Failed,4=Rebuild" >> "$OUT_FILE"
|
||||||
|
echo "# TYPE storcli_disk_state gauge" >> "$OUT_FILE"
|
||||||
|
echo "# HELP storcli_disk_media_error_count Media error count" >> "$OUT_FILE"
|
||||||
|
echo "# TYPE storcli_disk_media_error_count counter" >> "$OUT_FILE"
|
||||||
|
echo "# HELP storcli_disk_other_error_count Other error count" >> "$OUT_FILE"
|
||||||
|
echo "# TYPE storcli_disk_other_error_count counter" >> "$OUT_FILE"
|
||||||
|
echo "# HELP storcli_array_state Virtual drive state: 0=Offline,1=Online,2=Degraded,3=Failed" >> "$OUT_FILE"
|
||||||
|
echo "# TYPE storcli_array_state gauge" >> "$OUT_FILE"
|
||||||
|
|
||||||
|
# Состояния дисков из общей таблицы
|
||||||
|
/opt/MegaRAID/storcli/storcli64 /c0/eall/sall show all | awk '
|
||||||
|
/^252:[0-9]+\s+/ {
|
||||||
|
split($1, parts, ":");
|
||||||
|
slot = parts[2];
|
||||||
|
did = $2;
|
||||||
|
state = $3;
|
||||||
|
|
||||||
|
state_num = 2; # по умолчанию Online
|
||||||
|
if (state == "Offln") state_num = 0;
|
||||||
|
else if (state == "UGood") state_num = 1;
|
||||||
|
else if (state == "Failed") state_num = 3;
|
||||||
|
else if (state == "Rebuild") state_num = 4;
|
||||||
|
|
||||||
|
printf "storcli_disk_state{controller=\"0\",enclosure=\"252\",slot=\"%s\",did=\"%s\",model=\"ST8000NM0075\"} %s\n", slot, did, state_num
|
||||||
|
}
|
||||||
|
' >> "$OUT_FILE"
|
||||||
|
|
||||||
|
# Температуры и ошибки для каждого диска
|
||||||
|
for slot in 0 1 2 3; do
|
||||||
|
echo "Processing disk slot $slot..."
|
||||||
|
disk_info=$(/opt/MegaRAID/storcli/storcli64 /c0/e252/s$slot show all)
|
||||||
|
|
||||||
|
# Температура
|
||||||
|
temp=$(echo "$disk_info" | grep "Drive Temperature" | grep -oE '[0-9]+' | head -1)
|
||||||
|
if [ -n "$temp" ]; then
|
||||||
|
echo "storcli_disk_temp{controller=\"0\",enclosure=\"252\",slot=\"$slot\",model=\"ST8000NM0075\"} $temp" >> "$OUT_FILE"
|
||||||
|
echo " Temperature: $temp°C"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ошибки
|
||||||
|
media_line=$(echo "$disk_info" | grep "Media Error Count")
|
||||||
|
other_line=$(echo "$disk_info" | grep "Other Error Count")
|
||||||
|
|
||||||
|
# Извлекаем числа после "="
|
||||||
|
media_errors=$(echo "$media_line" | awk -F= '{print $2}' | awk '{print $1}')
|
||||||
|
other_errors=$(echo "$other_line" | awk -F= '{print $2}' | awk '{print $1}')
|
||||||
|
|
||||||
|
# Записываем ошибки (0 по умолчанию)
|
||||||
|
echo "storcli_disk_media_error_count{controller=\"0\",enclosure=\"252\",slot=\"$slot\",model=\"ST8000NM0075\"} ${media_errors:-0}" >> "$OUT_FILE"
|
||||||
|
echo "storcli_disk_other_error_count{controller=\"0\",enclosure=\"252\",slot=\"$slot\",model=\"ST8000NM0075\"} ${other_errors:-0}" >> "$OUT_FILE"
|
||||||
|
|
||||||
|
echo " Media errors: ${media_errors:-0}, Other errors: ${other_errors:-0}"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Состояние массива
|
||||||
|
array_line=$(/opt/MegaRAID/storcli/storcli64 /c0/vall show | grep "^0/0")
|
||||||
|
array_state=$(echo "$array_line" | awk '{print $3}')
|
||||||
|
state_num=1 # по умолчанию Optimal
|
||||||
|
[[ "$array_state" == "Dgrd" ]] && state_num=2
|
||||||
|
[[ "$array_state" == "Offln" ]] && state_num=0
|
||||||
|
[[ "$array_state" == "Pdgd" ]] && state_num=0
|
||||||
|
[[ "$array_state" == "Ft" ]] && state_num=3
|
||||||
|
|
||||||
|
echo "storcli_array_state{controller=\"0\",vd=\"0\"} $state_num" >> "$OUT_FILE"
|
||||||
|
echo "Array state: $array_state ($state_num)"
|
||||||
|
|
||||||
|
chmod 644 "$OUT_FILE"
|
||||||
|
echo "$(date -Iseconds) storcli metrics updated successfully"
|
||||||
Loading…
Reference in New Issue
Block a user