olimp/roles/proxmox_monitoring/tasks/main.yml
Administrator a948ee74a8 Update 12 files
- /roles/proxmox_monitoring/handlers/main.yml
- /roles/proxmox_monitoring/tasks/main.yml
- /roles/proxmox_monitoring/templates/node_exporter.service.j2
- /roles/proxmox_monitoring/templates/storcli_metrics.sh.j2
- /roles/proxmox_monitoring/templates/pve_exporter_config.yml.j2
- /roles/proxmox_base_setup/tasks/main.yml
- /roles/grafana/templates/docker-compose.yml.j2
- /roles/grafana/files/vmagent.yaml
- /roles/base_setup/tasks/main.yml
- /roles/base_setup/handlers/main.yml
- /group_vars/all.yml
- /olimp-deploy.yml
2025-11-18 19:57:51 +00:00

306 lines
7.8 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
- name: Update package cache
apt:
update_cache: yes
cache_valid_time: 86400
become: yes
- name: Install monitoring dependencies
apt:
name:
- python3
- python3-pip
- python3-venv
- curl
- wget
- jq
- smartmontools
state: present
become: yes
# ========== 1. Установка storcli с проверкой ==========
- name: Check if storcli is already installed
stat:
path: /opt/MegaRAID/storcli/storcli64
register: storcli_installed
become: yes
- name: Download storcli
get_url:
url: https://docs.broadcom.com/docs-and-downloads/raid-controllers/raid-controllers-common-files/storcli_1.24.02-1_all.deb
dest: /tmp/storcli.deb
mode: '0644'
when: not storcli_installed.stat.exists
become: yes
- name: Install storcli
apt:
deb: /tmp/storcli.deb
state: present
when: not storcli_installed.stat.exists
become: yes
- name: Create storcli symlink
file:
src: /opt/MegaRAID/storcli/storcli64
dest: /usr/local/bin/storcli
state: link
force: yes
when: storcli_installed.stat.exists or not storcli_installed.stat.exists
become: yes
- name: Verify storcli works
command: /opt/MegaRAID/storcli/storcli64 /c0 show
register: storcli_test
changed_when: false
become: yes
- name: Set fact — storcli is available
set_fact:
storcli_available: true
when: storcli_test.rc == 0
# ========== 2. Node Exporter ==========
- name: Create node_exporter user
user:
name: node_exporter
system: yes
shell: /usr/sbin/nologin
create_home: no
become: yes
- name: Download node_exporter
get_url:
url: "https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz"
dest: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz
checksum: "sha256:https://github.com/prometheus/node_exporter/releases/download/v1.8.2/sha256sums.txt"
mode: '0644'
timeout: 60
become: yes
- name: Extract node_exporter
unarchive:
src: /tmp/node_exporter-1.8.2.linux-amd64.tar.gz
dest: /tmp/
remote_src: yes
creates: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
become: yes
- name: Install node_exporter binary
copy:
src: /tmp/node_exporter-1.8.2.linux-amd64/node_exporter
dest: /usr/local/bin/node_exporter
owner: root
group: root
mode: '0755'
remote_src: yes
become: yes
notify: restart node_exporter
- name: Create textfile_collector directory
file:
path: /var/lib/node_exporter/textfile_collector
state: directory
owner: node_exporter
group: node_exporter
mode: '0755'
become: yes
- name: Deploy node_exporter systemd service
template:
src: node_exporter.service.j2
dest: /etc/systemd/system/node_exporter.service
owner: root
group: root
mode: '0644'
become: yes
notify: restart node_exporter
# ========== 3. PVE Exporter ==========
- name: Create pve_exporter user
user:
name: pve_exporter
system: yes
shell: /usr/sbin/nologin
create_home: no
become: yes
- name: Create pve_exporter directories
file:
path: "{{ item }}"
state: directory
owner: pve_exporter
group: pve_exporter
mode: '0755'
loop:
- /opt/pve_exporter
- /opt/pve_exporter/config
become: yes
- name: Create Python virtual environment
command:
cmd: python3 -m venv /opt/pve_exporter/venv
creates: /opt/pve_exporter/venv/bin/python
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Upgrade pip in virtual environment
command:
cmd: /opt/pve_exporter/venv/bin/pip install --upgrade pip
chdir: /opt/pve_exporter
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Install prometheus-pve-exporter package
command:
cmd: /opt/pve_exporter/venv/bin/pip install prometheus-pve-exporter
chdir: /opt/pve_exporter
become: yes
become_user: pve_exporter
environment:
HOME: /opt/pve_exporter
- name: Deploy pve_exporter config
template:
src: pve_exporter_config.yml.j2
dest: /opt/pve_exporter/config/config.yml
owner: pve_exporter
group: pve_exporter
mode: '0600'
become: yes
- name: Create pve_exporter systemd service
copy:
content: |
[Unit]
Description=Proxmox VE Exporter
After=network.target
[Service]
Type=simple
User=pve_exporter
WorkingDirectory=/opt/pve_exporter
Environment="HOME=/opt/pve_exporter"
ExecStart=/opt/pve_exporter/venv/bin/pve_exporter \
--web.listen-address=0.0.0.0:9223 \
--config.file=/opt/pve_exporter/config/config.yml
Restart=always
RestartSec=10
StandardOutput=journal
StandardError=journal
SyslogIdentifier=pve_exporter
[Install]
WantedBy=multi-user.target
dest: /etc/systemd/system/pve_exporter.service
mode: '0644'
become: yes
notify: restart pve_exporter
# ========== 4. StorCLI Metrics ==========
- name: Deploy storcli metrics script
template:
src: storcli_metrics.sh.j2
dest: /opt/scripts/storcli_metrics.sh
owner: root
group: root
mode: '0755'
when: storcli_available | default(false)
become: yes
- name: Deploy storcli_metrics systemd units
block:
- name: Create storcli_metrics.service
copy:
content: |
[Unit]
Description=Collect RAID/disk metrics via storcli
After=network.target
[Service]
Type=oneshot
ExecStart=/opt/scripts/storcli_metrics.sh
User=root
StandardOutput=journal
StandardError=journal
dest: /etc/systemd/system/storcli_metrics.service
owner: root
mode: '0644'
- name: Create storcli_metrics.timer (every 5 min)
copy:
content: |
[Unit]
Description=Run storcli metrics collector every 5 minutes
Requires=storcli_metrics.service
[Timer]
OnBootSec=60
OnUnitActiveSec=5m
AccuracySec=1s
[Install]
WantedBy=timers.target
dest: /etc/systemd/system/storcli_metrics.timer
owner: root
mode: '0644'
- name: Enable & start storcli_metrics.timer
systemd:
name: storcli_metrics.timer
state: started
enabled: yes
daemon_reload: yes
when: storcli_available | default(false)
become: yes
notify: restart storcli_metrics
# ========== 5. Запуск и проверка сервисов ==========
- name: Start and enable all services
systemd:
name: "{{ item }}"
state: started
enabled: yes
daemon_reload: yes
loop:
- node_exporter
- pve_exporter
become: yes
- name: Wait for services to initialize
wait_for:
host: localhost
port: "{{ item.port }}"
timeout: 30
state: started
delay: 5
loop:
- { port: 9100, service: "node_exporter" }
- { port: 9223, service: "pve_exporter" }
become: yes
failed_when: false
- name: Verify services are responding
uri:
url: "http://localhost:{{ item.port }}/metrics"
status_code: 200
timeout: 10
loop:
- { port: 9100, service: "node_exporter" }
- { port: 9223, service: "pve_exporter" }
register: service_checks
become: yes
- name: Show service status
debug:
msg: "{{ item.item.service }} - {{ item.status }}"
loop: "{{ service_checks.results }}"
loop_control:
label: "{{ item.item.service }}"
- name: Run initial storcli metrics collection
command: /opt/scripts/storcli_metrics.sh
when: storcli_available | default(false)
become: yes
changed_when: false