diff options
Diffstat (limited to 'playbooks/roles/mon')
5 files changed, 95 insertions, 16 deletions
diff --git a/playbooks/roles/mon/templates/stacks/docker-compose.yml b/playbooks/roles/mon/templates/stacks/docker-compose.yml index 31a5932..31fe0c1 100644 --- a/playbooks/roles/mon/templates/stacks/docker-compose.yml +++ b/playbooks/roles/mon/templates/stacks/docker-compose.yml @@ -47,11 +47,40 @@ services: labels: - traefik.enable=false + grafana: + image: grafana/grafana:latest + environment: + - TZ={{ timezone }} + - DEPLOYMENT_TIME={{ deployment_time }} + volumes: + - "{{ mon_base }}/volumes/grafana/data:/var/lib/grafana" + - "{{ mon_base }}/volumes/grafana/config.ini:/etc/grafana/grafana.ini:ro" + networks: + - monint + - proxy + - metrics + deploy: + mode: replicated + replicas: 1 + update_config: + parallelism: 1 + order: start-first + failure_action: rollback + labels: + - traefik.enable=true + - traefik.swarm.network=proxy + - traefik.http.routers.grafana.tls=true + - traefik.http.routers.grafana.tls.certResolver=letsencrypt + - traefik.http.routers.grafana.rule=Host(`{{ grafana_domain }}`) + - traefik.http.routers.grafana.entrypoints=websecure + - traefik.http.services.grafana.loadbalancer.server.port=3000 + prometheus: image: prom/prometheus:latest volumes: - "{{ mon_base }}/volumes/prometheus/config.yml:/etc/prometheus/prometheus.yml" networks: + - monint - proxy - metrics environment: diff --git a/playbooks/roles/mon/templates/volumes/gatus/config/config.yml b/playbooks/roles/mon/templates/volumes/gatus/config/config.yml index 403df4a..e5fcf73 100644 --- a/playbooks/roles/mon/templates/volumes/gatus/config/config.yml +++ b/playbooks/roles/mon/templates/volumes/gatus/config/config.yml @@ -19,13 +19,13 @@ endpoints: - "[BODY] == pat(*OK*)" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "GlobalHealthCheck" - name: "LDAPS" url: "tls://{{ idm_domain }}:3636" - interval: 5m + interval: 2m client: timeout: 5s conditions: @@ -33,10 +33,24 @@ endpoints: - "[CERTIFICATE_EXPIRATION] > 48h" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "LDAPS" + - name: "ssh for git" + url: "tls://src.{{ domain }}:2222" + interval: 2m + client: + timeout: 5s + conditions: + - "[CONNECTED] == true" + - "[CERTIFICATE_EXPIRATION] > 48h" + alerts: + - type: ntfy + failure-threshold: 3 + send-on-resolved: true + description: "ssh for git" + {% for test in email_tests %} {% set from_account = (email_accounts | selectattr("email", "equalto", test.from) | list).0 %} {% set to_account = (email_accounts | selectattr("email", "equalto", test.to) | list).0 %} @@ -73,7 +87,7 @@ endpoints: - "[BODY] == pat(*ok*)" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "mail {{ test.name }}" {% endfor %} @@ -82,7 +96,7 @@ endpoints: - name: "mail on port {{ port }}" group: "mail" url: "tls://{{ mail_domain }}:{{ port }}" - interval: 5m + interval: 2m client: timeout: 5s conditions: @@ -90,7 +104,7 @@ endpoints: - "[CERTIFICATE_EXPIRATION] > 48h" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "mail on port {{ port }}" {% endfor %} @@ -100,13 +114,13 @@ endpoints: - name: "healthcheck {{ user }} pub {{ healthcheck }} 200" group: "{{ user }}_pub" url: "{{ healthcheck }}" - interval: 1m + interval: 2m conditions: - "[STATUS] == 200" - "[CERTIFICATE_EXPIRATION] > 240h" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "healthcheck {{ user }} pub {{ healthcheck }} 200" {% endfor %} @@ -116,13 +130,13 @@ endpoints: client: dns-resolver: "tcp://{{ m.gateway }}:53" group: "{{ user }}_priv" - interval: 1m + interval: 2m conditions: - "[STATUS] == 200" - "[CERTIFICATE_EXPIRATION] > 240h" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "{{ healthcheck }} priv healthcheck {{ user }}" @@ -131,12 +145,12 @@ endpoints: url: "{{ healthcheck }}" client: dns-resolver: "tcp://{{ public_resolver }}:53" - interval: 1m + interval: 2m conditions: - "[STATUS] == 403" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "{{ healthcheck }} pub healthcheck {{ user }} 403" {% endfor %} @@ -144,7 +158,7 @@ endpoints: - name: "DNS Check [{{ record.name }}_{{ record.type }}]" group: "{{ user }}_dns_private" url: "{{ m.gateway }}" - interval: 5m + interval: 2m dns: query-name: "{{ record.name }}" query-type: "{{ record.type }}" @@ -153,18 +167,19 @@ endpoints: - "[DNS_RCODE] == NOERROR" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "DNS {{ record.name }}_{{ record.type }}" - name: "PING {{ record.name }}_{{ record.type }}" group: "{{ user }}_dns_private" url: "icmp://{{ record.name }}" + interval: 2m conditions: - "[CONNECTED] == true" alerts: - type: ntfy - failure-threshold: 1 + failure-threshold: 3 send-on-resolved: true description: "PING {{ record.name }}" {% endfor %} diff --git a/playbooks/roles/mon/templates/volumes/grafana/config.ini b/playbooks/roles/mon/templates/volumes/grafana/config.ini new file mode 100644 index 0000000..e0371ea --- /dev/null +++ b/playbooks/roles/mon/templates/volumes/grafana/config.ini @@ -0,0 +1,28 @@ +[date_formats] +full_date = YYYY-MM-DD @ HH:mm:ss +interval_second = HH:mm:ss +interval_minute = HH:mm +interval_hour = DD.MM. HH:mm +interval_day = DD.MM. +interval_month = MM-YYYY +interval_year = YYYY + +[server] +root_url = https://{{ grafana_domain }} + +[auth.generic_oauth] +enabled = true +name = liz.coffee <3 +icon = signin +client_id = grafana +client_secret = {{ grafana_secret }} +scopes = openid profile email groups +empty_scopes = false +auth_url="https://{{ idm_domain }}/ui/oauth2" +token_url="https://{{ idm_domain }}/oauth2/token" +api_url="https://{{ idm_domain }}/oauth2/openid/grafana/userinfo" +login_attribute_path = preferred_username +groups_attribute_path = groups +name_attribute_path = name +role_attribute_path = contains(groups, 'grafana_admins@idm.liz.coffee') && 'Admin' || 'Viewer' +use_pkce = true diff --git a/playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep b/playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/playbooks/roles/mon/templates/volumes/grafana/data/.gitkeep diff --git a/playbooks/roles/mon/templates/volumes/prometheus/config.yml b/playbooks/roles/mon/templates/volumes/prometheus/config.yml index be59f7f..7476367 100644 --- a/playbooks/roles/mon/templates/volumes/prometheus/config.yml +++ b/playbooks/roles/mon/templates/volumes/prometheus/config.yml @@ -1,5 +1,5 @@ global: - scrape_interval: 20s + scrape_interval: 30s scrape_configs: - job_name: prometheus @@ -22,6 +22,13 @@ scrape_configs: - targets: - traefik_traefik:5577 + - job_name: proxmox + static_configs: + - targets: + - piplup.liz.coffee:9001 + - togepi.liz.coffee:9001 + - roton.liz.coffee:9001 + - job_name: headscale static_configs: - targets: |