olimp/roles/pve_monitoring/tasks/main.yml
2025-11-18 07:06:44 +00:00

176 lines
4.5 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
# ========== 1. Установка storcli (если отсутствует) ==========
- name: Check if storcli is already installed
stat:
path: /opt/MegaRAID/storcli/storcli64
register: storcli_installed
become: yes
- name: Download storcli
get_url:
url: https://docs.broadcom.com/docs-and-downloads/raid-controllers/raid-controllers-common-files/storcli_1.24.02-1_all.deb
dest: /tmp/storcli.deb
mode: '0644'
when: not storcli_installed.stat.exists
become: yes
- name: Install storcli
apt:
deb: /tmp/storcli.deb
state: present
when: not storcli_installed.stat.exists
become: yes
- name: Verify storcli works
command: /opt/MegaRAID/storcli/storcli64 /c0 show
register: storcli_test
changed_when: false
failed_when: storcli_test.rc != 0
become: yes
# ========== 2. Настройка pve_exporter (Python) ==========
- name: Install Python dependencies
apt:
name:
- python3
- python3-pip
- python3-venv
state: present
become: yes
- name: Create pve_exporter user
user:
name: pve_exporter
system: yes
shell: /usr/sbin/nologin
create_home: no
become: yes
- name: Create pve_exporter directories
file:
path: "{{ item }}"
state: directory
owner: pve_exporter
group: pve_exporter
mode: '0755'
loop:
- /opt/pve_exporter
- /opt/pve_exporter/config
become: yes
- name: Ensure sudo is installed
apt:
name: sudo
state: present
become: yes
- name: Create Python virtual environment
command:
cmd: python3 -m venv /opt/pve_exporter/venv
creates: /opt/pve_exporter/venv/bin/python
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Upgrade pip in virtual environment
command:
cmd: /opt/pve_exporter/venv/bin/pip install --upgrade pip
chdir: /opt/pve_exporter
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Install prometheus-pve-exporter package
command:
cmd: /opt/pve_exporter/venv/bin/pip install prometheus-pve-exporter
chdir: /opt/pve_exporter
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Deploy pve_exporter config
template:
src: pve_exporter_config.yml.j2
dest: /opt/pve_exporter/config/config.yml
owner: pve_exporter
group: pve_exporter
mode: '0600'
become: yes
# УБИРАЕМ проверку с --test (она не поддерживается)
- name: Create pve_exporter systemd service
copy:
content: |
[Unit]
Description=Proxmox VE Exporter
After=network.target
[Service]
Type=simple
User=pve_exporter
WorkingDirectory=/opt/pve_exporter
Environment="HOME=/opt/pve_exporter"
ExecStart=/opt/pve_exporter/venv/bin/pve_exporter \
--server 0.0.0.0 \
--port 9223 \
--config /opt/pve_exporter/config/config.yml
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal
SyslogIdentifier=pve_exporter
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/pve_exporter.service
mode: '0644'
become: yes
- name: Reload systemd and start pve_exporter
systemd:
name: pve_exporter
enabled: yes
state: started
daemon_reload: yes
become: yes
- name: Wait for service to initialize
pause:
seconds: 10
become: yes
# Проверяем статус сервиса
- name: Check pve_exporter service status
command: systemctl status pve_exporter
register: service_status
changed_when: false
become: yes
# Если сервис не активен - показываем логи
- name: Show pve_exporter logs
command: journalctl -u pve_exporter --since "5 minutes ago" --no-pager
register: service_logs
changed_when: false
when: "'Active: failed' in service_status.stdout or 'Active: inactive' in service_status.stdout"
become: yes
# Проверяем открытые порты
- name: Check listening ports
command: ss -tlnp | grep ':9223'
register: port_status
changed_when: false
become: yes
# Финальная проверка
- name: Verify exporter is responding
uri:
url: http://localhost:9223/metrics
status_code: 200
timeout: 10
register: metrics_check
failed_when: metrics_check.status != 200
become: yes