--- - name: Update package cache apt: update_cache: yes cache_valid_time: 86400 become: yes - name: Install monitoring dependencies apt: name: - python3 - python3-pip - python3-venv - curl - wget - jq - smartmontools state: present become: yes # ========== 1. Установка storcli с проверкой ========== - name: Check if storcli is already installed stat: path: /opt/MegaRAID/storcli/storcli64 register: storcli_installed become: yes - name: Download storcli get_url: url: https://docs.broadcom.com/docs-and-downloads/raid-controllers/raid-controllers-common-files/storcli_1.24.02-1_all.deb dest: /tmp/storcli.deb mode: '0644' when: not storcli_installed.stat.exists become: yes - name: Install storcli apt: deb: /tmp/storcli.deb state: present when: not storcli_installed.stat.exists become: yes - name: Create storcli symlink file: src: /opt/MegaRAID/storcli/storcli64 dest: /usr/local/bin/storcli state: link force: yes when: storcli_installed.stat.exists or not storcli_installed.stat.exists become: yes - name: Verify storcli works command: /opt/MegaRAID/storcli/storcli64 /c0 show register: storcli_test changed_when: false become: yes - name: Set fact — storcli is available set_fact: storcli_available: true when: storcli_test.rc == 0 # ========== 2. Node Exporter ========== - name: Create node_exporter user user: name: node_exporter system: yes shell: /usr/sbin/nologin create_home: no become: yes - name: Download node_exporter get_url: url: "https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz" dest: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz checksum: "sha256:https://github.com/prometheus/node_exporter/releases/download/v1.8.2/sha256sums.txt" mode: '0644' timeout: 60 become: yes - name: Extract node_exporter unarchive: src: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz dest: /tmp/ remote_src: yes creates: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter become: yes - name: Install node_exporter binary copy: src: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter dest: /usr/local/bin/node_exporter owner: root group: root mode: '0755' remote_src: yes become: yes notify: restart node_exporter - name: Create textfile_collector directory file: path: /var/lib/node_exporter/textfile_collector state: directory owner: node_exporter group: node_exporter mode: '0755' become: yes - name: Deploy node_exporter systemd service template: src: node_exporter.service.j2 dest: /etc/systemd/system/node_exporter.service owner: root group: root mode: '0644' become: yes notify: restart node_exporter # ========== 3. PVE Exporter ========== - name: Create pve_exporter user user: name: pve_exporter system: yes shell: /usr/sbin/nologin create_home: no become: yes - name: Create pve_exporter directories file: path: "{{ item }}" state: directory owner: pve_exporter group: pve_exporter mode: '0755' loop: - /opt/pve_exporter - /opt/pve_exporter/config become: yes - name: Create Python virtual environment command: cmd: python3 -m venv /opt/pve_exporter/venv creates: /opt/pve_exporter/venv/bin/python become: yes become_user: pve_exporter environment: HOME: /opt/pve_exporter - name: Upgrade pip in virtual environment command: cmd: /opt/pve_exporter/venv/bin/pip install --upgrade pip chdir: /opt/pve_exporter become: yes become_user: pve_exporter environment: HOME: /opt/pve_exporter - name: Install prometheus-pve-exporter package command: cmd: /opt/pve_exporter/venv/bin/pip install prometheus-pve-exporter chdir: /opt/pve_exporter become: yes become_user: pve_exporter environment: HOME: /opt/pve_exporter - name: Deploy pve_exporter config template: src: pve_exporter_config.yml.j2 dest: /opt/pve_exporter/config/config.yml owner: pve_exporter group: pve_exporter mode: '0600' become: yes - name: Create pve_exporter systemd service copy: content: | [Unit] Description=Proxmox VE Exporter After=network.target [Service] Type=simple User=pve_exporter WorkingDirectory=/opt/pve_exporter Environment="HOME=/opt/pve_exporter" ExecStart=/opt/pve_exporter/venv/bin/pve_exporter \ --web.listen-address=0.0.0.0:9223 \ --config.file=/opt/pve_exporter/config/config.yml Restart=always RestartSec=10 StandardOutput=journal StandardError=journal SyslogIdentifier=pve_exporter [Install] WantedBy=multi-user.target dest: /etc/systemd/system/pve_exporter.service mode: '0644' become: yes notify: restart pve_exporter # ========== 4. StorCLI Metrics ========== - name: Deploy storcli metrics script template: src: storcli_metrics.sh.j2 dest: /opt/scripts/storcli_metrics.sh owner: root group: root mode: '0755' when: storcli_available | default(false) become: yes - name: Deploy storcli_metrics systemd units block: - name: Create storcli_metrics.service copy: content: | [Unit] Description=Collect RAID/disk metrics via storcli After=network.target [Service] Type=oneshot ExecStart=/opt/scripts/storcli_metrics.sh User=root StandardOutput=journal StandardError=journal dest: /etc/systemd/system/storcli_metrics.service owner: root mode: '0644' - name: Create storcli_metrics.timer (every 5 min) copy: content: | [Unit] Description=Run storcli metrics collector every 5 minutes Requires=storcli_metrics.service [Timer] OnBootSec=60 OnUnitActiveSec=5m AccuracySec=1s [Install] WantedBy=timers.target dest: /etc/systemd/system/storcli_metrics.timer owner: root mode: '0644' - name: Enable & start storcli_metrics.timer systemd: name: storcli_metrics.timer state: started enabled: yes daemon_reload: yes when: storcli_available | default(false) become: yes notify: restart storcli_metrics # ========== 5. Запуск и проверка сервисов ========== - name: Start and enable all services systemd: name: "{{ item }}" state: started enabled: yes daemon_reload: yes loop: - node_exporter - pve_exporter become: yes - name: Wait for services to initialize wait_for: host: localhost port: "{{ item.port }}" timeout: 30 state: started delay: 5 loop: - { port: 9100, service: "node_exporter" } - { port: 9223, service: "pve_exporter" } become: yes failed_when: false - name: Verify services are responding uri: url: "http://localhost:{{ item.port }}/metrics" status_code: 200 timeout: 10 loop: - { port: 9100, service: "node_exporter" } - { port: 9223, service: "pve_exporter" } register: service_checks become: yes - name: Show service status debug: msg: "{{ item.item.service }} - {{ item.status }}" loop: "{{ service_checks.results }}" loop_control: label: "{{ item.item.service }}" - name: Run initial storcli metrics collection command: /opt/scripts/storcli_metrics.sh when: storcli_available | default(false) become: yes changed_when: false