olimp/roles/pve_monitoring/tasks/main.yml
2025-11-18 05:45:41 +00:00

186 lines
5.0 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
# ========== 1. Установка storcli (если отсутствует) ==========
- name: Check if storcli is already installed
stat:
path: /opt/MegaRAID/storcli/storcli64
register: storcli_installed
become: yes
- name: Download storcli
get_url:
url: https://docs.broadcom.com/docs-and-downloads/raid-controllers/raid-controllers-common-files/storcli_1.24.02-1_all.deb
dest: /tmp/storcli.deb
mode: '0644'
when: not storcli_installed.stat.exists
become: yes
- name: Install storcli
apt:
deb: /tmp/storcli.deb
state: present
when: not storcli_installed.stat.exists
become: yes
- name: Verify storcli works
command: /opt/MegaRAID/storcli/storcli64 /c0 show
register: storcli_test
changed_when: false
failed_when: storcli_test.rc != 0
become: yes
# ========== 2. Настройка pve_exporter (Python) ==========
- name: Install Python dependencies
apt:
name:
- python3
- python3-pip
- python3-venv
state: present
become: yes
- name: Create pve_exporter user
user:
name: pve_exporter
system: yes
shell: /usr/sbin/nologin
create_home: no
become: yes
- name: Create pve_exporter directories
file:
path: "{{ item }}"
state: directory
owner: pve_exporter
group: pve_exporter
mode: '0755'
loop:
- /opt/pve_exporter
- /opt/pve_exporter/config
become: yes
# Устанавливаем sudo для корректной работы become
- name: Ensure sudo is installed
apt:
name: sudo
state: present
become: yes
- name: Create Python virtual environment
command:
cmd: python3 -m venv /opt/pve_exporter/venv
creates: /opt/pve_exporter/venv/bin/python
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Upgrade pip in virtual environment
command:
cmd: /opt/pve_exporter/venv/bin/pip install --upgrade pip
chdir: /opt/pve_exporter
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Install prometheus-pve-exporter package
command:
cmd: /opt/pve_exporter/venv/bin/pip install prometheus-pve-exporter
chdir: /opt/pve_exporter
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Deploy pve_exporter config
template:
src: pve_exporter_config.yml.j2
dest: /opt/pve_exporter/config/config.yml
owner: pve_exporter
group: pve_exporter
mode: '0600'
become: yes
# УДАЛЯЕМ задачу с --test - этот флаг не поддерживается
# Вместо этого проверяем конфиг вручную через запуск сервиса
- name: Create pve_exporter systemd service
copy:
content: |
[Unit]
Description=Proxmox VE Exporter
After=network.target
[Service]
Type=simple
User=pve_exporter
WorkingDirectory=/opt/pve_exporter
Environment="HOME=/opt/pve_exporter"
ExecStart=/opt/pve_exporter/venv/bin/pve_exporter \
--server 0.0.0.0 \
--port 9223 \
--config /opt/pve_exporter/config/config.yml
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal
SyslogIdentifier=pve_exporter
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/pve_exporter.service
mode: '0644'
become: yes
- name: Reload systemd and start pve_exporter
systemd:
name: pve_exporter
enabled: yes
state: started
daemon_reload: yes
become: yes
register: service_start
failed_when: false # Не падаем сразу, а проверим статус
# Ждём запуска сервиса и проверяем порт
- name: Wait for pve_exporter to initialize
wait_for:
host: localhost
port: 9223
timeout: 60
state: started
delay: 5
register: port_check
failed_when: false
become: yes
# Если порт недоступен - показываем логи для отладки
- name: Show pve_exporter logs if failed
command: journalctl -u pve_exporter -n 100 --no-pager
register: service_logs
changed_when: false
when: port_check.failed
become: yes
failed_when: false
# Финальная проверка работоспособности
- name: Verify exporter is responding
uri:
url: http://localhost:9223/metrics
status_code: 200
timeout: 10
register: metrics_check
when: not port_check.failed
failed_when: metrics_check.status != 200
become: yes
- name: Fail with detailed error if pve_exporter not started
fail:
msg: |
pve_exporter failed to start. Check logs above.
Common causes:
1. Incorrect API token in config.yml
2. Missing permissions for pve_exporter user
3. Port 9223 is already in use
4. Proxmox API is not accessible
when: port_check.failed