summaryrefslogtreecommitdiff
path: root/playbooks/roles/mon
diff options
context:
space:
mode:
Diffstat (limited to 'playbooks/roles/mon')
-rw-r--r--playbooks/roles/mon/templates/stacks/docker-compose.yml29
-rw-r--r--playbooks/roles/mon/templates/volumes/gatus/config/config.yml45
-rw-r--r--playbooks/roles/mon/templates/volumes/grafana/config.ini28
-rw-r--r--playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep0
-rw-r--r--playbooks/roles/mon/templates/volumes/prometheus/config.yml9
5 files changed, 95 insertions, 16 deletions
diff --git a/playbooks/roles/mon/templates/stacks/docker-compose.yml b/playbooks/roles/mon/templates/stacks/docker-compose.yml
index 31a5932..31fe0c1 100644
--- a/playbooks/roles/mon/templates/stacks/docker-compose.yml
+++ b/playbooks/roles/mon/templates/stacks/docker-compose.yml
@@ -47,11 +47,40 @@ services:
labels:
- traefik.enable=false
+ grafana:
+ image: grafana/grafana:latest
+ environment:
+ - TZ={{ timezone }}
+ - DEPLOYMENT_TIME={{ deployment_time }}
+ volumes:
+ - "{{ mon_base }}/volumes/grafana/data:/var/lib/grafana"
+ - "{{ mon_base }}/volumes/grafana/config.ini:/etc/grafana/grafana.ini:ro"
+ networks:
+ - monint
+ - proxy
+ - metrics
+ deploy:
+ mode: replicated
+ replicas: 1
+ update_config:
+ parallelism: 1
+ order: start-first
+ failure_action: rollback
+ labels:
+ - traefik.enable=true
+ - traefik.swarm.network=proxy
+ - traefik.http.routers.grafana.tls=true
+ - traefik.http.routers.grafana.tls.certResolver=letsencrypt
+ - traefik.http.routers.grafana.rule=Host(`{{ grafana_domain }}`)
+ - traefik.http.routers.grafana.entrypoints=websecure
+ - traefik.http.services.grafana.loadbalancer.server.port=3000
+
prometheus:
image: prom/prometheus:latest
volumes:
- "{{ mon_base }}/volumes/prometheus/config.yml:/etc/prometheus/prometheus.yml"
networks:
+ - monint
- proxy
- metrics
environment:
diff --git a/playbooks/roles/mon/templates/volumes/gatus/config/config.yml b/playbooks/roles/mon/templates/volumes/gatus/config/config.yml
index 403df4a..e5fcf73 100644
--- a/playbooks/roles/mon/templates/volumes/gatus/config/config.yml
+++ b/playbooks/roles/mon/templates/volumes/gatus/config/config.yml
@@ -19,13 +19,13 @@ endpoints:
- "[BODY] == pat(*OK*)"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "GlobalHealthCheck"
- name: "LDAPS"
url: "tls://{{ idm_domain }}:3636"
- interval: 5m
+ interval: 2m
client:
timeout: 5s
conditions:
@@ -33,10 +33,24 @@ endpoints:
- "[CERTIFICATE_EXPIRATION] > 48h"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "LDAPS"
+ - name: "ssh for git"
+ url: "tls://src.{{ domain }}:2222"
+ interval: 2m
+ client:
+ timeout: 5s
+ conditions:
+ - "[CONNECTED] == true"
+ - "[CERTIFICATE_EXPIRATION] > 48h"
+ alerts:
+ - type: ntfy
+ failure-threshold: 3
+ send-on-resolved: true
+ description: "ssh for git"
+
{% for test in email_tests %}
{% set from_account = (email_accounts | selectattr("email", "equalto", test.from) | list).0 %}
{% set to_account = (email_accounts | selectattr("email", "equalto", test.to) | list).0 %}
@@ -73,7 +87,7 @@ endpoints:
- "[BODY] == pat(*ok*)"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "mail {{ test.name }}"
{% endfor %}
@@ -82,7 +96,7 @@ endpoints:
- name: "mail on port {{ port }}"
group: "mail"
url: "tls://{{ mail_domain }}:{{ port }}"
- interval: 5m
+ interval: 2m
client:
timeout: 5s
conditions:
@@ -90,7 +104,7 @@ endpoints:
- "[CERTIFICATE_EXPIRATION] > 48h"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "mail on port {{ port }}"
{% endfor %}
@@ -100,13 +114,13 @@ endpoints:
- name: "healthcheck {{ user }} pub {{ healthcheck }} 200"
group: "{{ user }}_pub"
url: "{{ healthcheck }}"
- interval: 1m
+ interval: 2m
conditions:
- "[STATUS] == 200"
- "[CERTIFICATE_EXPIRATION] > 240h"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "healthcheck {{ user }} pub {{ healthcheck }} 200"
{% endfor %}
@@ -116,13 +130,13 @@ endpoints:
client:
dns-resolver: "tcp://{{ m.gateway }}:53"
group: "{{ user }}_priv"
- interval: 1m
+ interval: 2m
conditions:
- "[STATUS] == 200"
- "[CERTIFICATE_EXPIRATION] > 240h"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "{{ healthcheck }} priv healthcheck {{ user }}"
@@ -131,12 +145,12 @@ endpoints:
url: "{{ healthcheck }}"
client:
dns-resolver: "tcp://{{ public_resolver }}:53"
- interval: 1m
+ interval: 2m
conditions:
- "[STATUS] == 403"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "{{ healthcheck }} pub healthcheck {{ user }} 403"
{% endfor %}
@@ -144,7 +158,7 @@ endpoints:
- name: "DNS Check [{{ record.name }}_{{ record.type }}]"
group: "{{ user }}_dns_private"
url: "{{ m.gateway }}"
- interval: 5m
+ interval: 2m
dns:
query-name: "{{ record.name }}"
query-type: "{{ record.type }}"
@@ -153,18 +167,19 @@ endpoints:
- "[DNS_RCODE] == NOERROR"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "DNS {{ record.name }}_{{ record.type }}"
- name: "PING {{ record.name }}_{{ record.type }}"
group: "{{ user }}_dns_private"
url: "icmp://{{ record.name }}"
+ interval: 2m
conditions:
- "[CONNECTED] == true"
alerts:
- type: ntfy
- failure-threshold: 1
+ failure-threshold: 3
send-on-resolved: true
description: "PING {{ record.name }}"
{% endfor %}
diff --git a/playbooks/roles/mon/templates/volumes/grafana/config.ini b/playbooks/roles/mon/templates/volumes/grafana/config.ini
new file mode 100644
index 0000000..e0371ea
--- /dev/null
+++ b/playbooks/roles/mon/templates/volumes/grafana/config.ini
@@ -0,0 +1,28 @@
+[date_formats]
+full_date = YYYY-MM-DD @ HH:mm:ss
+interval_second = HH:mm:ss
+interval_minute = HH:mm
+interval_hour = DD.MM. HH:mm
+interval_day = DD.MM.
+interval_month = MM-YYYY
+interval_year = YYYY
+
+[server]
+root_url = https://{{ grafana_domain }}
+
+[auth.generic_oauth]
+enabled = true
+name = liz.coffee <3
+icon = signin
+client_id = grafana
+client_secret = {{ grafana_secret }}
+scopes = openid profile email groups
+empty_scopes = false
+auth_url="https://{{ idm_domain }}/ui/oauth2"
+token_url="https://{{ idm_domain }}/oauth2/token"
+api_url="https://{{ idm_domain }}/oauth2/openid/grafana/userinfo"
+login_attribute_path = preferred_username
+groups_attribute_path = groups
+name_attribute_path = name
+role_attribute_path = contains(groups, 'grafana_admins@idm.liz.coffee') && 'Admin' || 'Viewer'
+use_pkce = true
diff --git a/playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep b/playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep
diff --git a/playbooks/roles/mon/templates/volumes/prometheus/config.yml b/playbooks/roles/mon/templates/volumes/prometheus/config.yml
index be59f7f..7476367 100644
--- a/playbooks/roles/mon/templates/volumes/prometheus/config.yml
+++ b/playbooks/roles/mon/templates/volumes/prometheus/config.yml
@@ -1,5 +1,5 @@
global:
- scrape_interval: 20s
+ scrape_interval: 30s
scrape_configs:
- job_name: prometheus
@@ -22,6 +22,13 @@ scrape_configs:
- targets:
- traefik_traefik:5577
+ - job_name: proxmox
+ static_configs:
+ - targets:
+ - piplup.liz.coffee:9001
+ - togepi.liz.coffee:9001
+ - roton.liz.coffee:9001
+
- job_name: headscale
static_configs:
- targets: