diff --git a/roles/pve_monitoring/tasks/main.yml b/roles/pve_monitoring/tasks/main.yml index cae8881..716032a 100644 --- a/roles/pve_monitoring/tasks/main.yml +++ b/roles/pve_monitoring/tasks/main.yml @@ -66,12 +66,21 @@ - /opt/pve_exporter/config become: yes +# Устанавливаем sudo для работы с become +- name: Ensure sudo is installed + apt: + name: sudo + state: present + become: yes + - name: Create Python virtual environment command: cmd: python3 -m venv /opt/pve_exporter/venv creates: /opt/pve_exporter/venv/bin/python become: yes become_user: pve_exporter + environment: + HOME: /opt/pve_exporter - name: Upgrade pip in virtual environment command: @@ -79,6 +88,8 @@ chdir: /opt/pve_exporter become: yes become_user: pve_exporter + environment: + HOME: /opt/pve_exporter - name: Install prometheus-pve-exporter package command: @@ -89,7 +100,7 @@ environment: HOME: /opt/pve_exporter -- name: Deploy pve_exporter config +- name: Deploy pve_exporter config (with vault secrets) template: src: pve_exporter_config.yml.j2 dest: /opt/pve_exporter/config/config.yml @@ -98,6 +109,16 @@ mode: '0600' become: yes +# Проверяем конфиг перед запуском сервиса +- name: Verify pve_exporter config syntax + command: /opt/pve_exporter/venv/bin/pve_exporter --config /opt/pve_exporter/config/config.yml --test + become: yes + become_user: pve_exporter + register: config_test + changed_when: false + failed_when: config_test.rc != 0 + ignore_errors: yes + - name: Create pve_exporter systemd service copy: content: | @@ -115,6 +136,7 @@ --config /opt/pve_exporter/config/config.yml Restart=always RestartSec=10 + Environment="HOME=/opt/pve_exporter" [Install] WantedBy=multi-user.target @@ -122,13 +144,40 @@ mode: '0644' become: yes -- name: Enable and start pve_exporter +- name: Reload systemd and start pve_exporter systemd: name: pve_exporter enabled: yes state: started daemon_reload: yes become: yes + register: service_start + failed_when: false + +# Ждём 15 секунд для полного запуска +- name: Wait for pve_exporter to initialize + wait_for: + host: localhost + port: 9223 + timeout: 30 + state: started + delay: 5 + register: port_check + failed_when: port_check.failed and service_start.status.ActiveState != "active" + become: yes + +# Если порт недоступен — показываем логи для отладки +- name: Show pve_exporter logs if failed + command: journalctl -u pve_exporter -n 100 --no-pager + register: service_logs + changed_when: false + when: port_check.failed + become: yes + +- name: Fail if pve_exporter is not running + fail: + msg: "pve_exporter failed to start. Check logs above." + when: port_check.failed # ========== 4. RAID monitoring via storcli + node_exporter textfile ========== - name: Ensure node_exporter textfile dir exists