Alerts

/prometheus/prometheus/linux-infra.rule > linux
ExporterDown (109 active)
alert: ExporterDown
expr: up == 0
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Prometheus exporter down
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Exporter down (instance {{ $labels.instance }})
Labels State Active Since Value
alertname="ExporterDown" instance="51.158.20.96:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.44.14:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.92.19:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.93.196:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.88.201:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.4.15.70:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.93.60:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="93.189.103.175" job="windows_exporter" service="windows_exporter" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.95.29:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="innodev.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.88.219:9100" job="node" service="node" severity="critical" firing 2024-09-05 08:12:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.12.232:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.5.91:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="deliciousfood.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="homoola.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.195:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.7.244:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.147:9100" job="node" service="node" severity="critical" firing 2024-10-06 01:33:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.144:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.6.178:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.6.68:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.129:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.5.46:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.15.62:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="sumtravel.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.184.161:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.31.188:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.11.128:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="app.rasseed.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="javacafe.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.146:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.146:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.90.210:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.27.86:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="163.172.125.199:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.136.236:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.15.151.140:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.142:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="cardial.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.135:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.15.39:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.30.8:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.112:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="taqa.gov.sa:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="163.172.125.96:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.44.13:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.131.121:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.33:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.144.222:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.187.200:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.5.139:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.70.147:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="195.154.58.143:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.134:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.145:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="163.172.125.121:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.138.96:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.70.118:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.70.59:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.132:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.47.136:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.92.18:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.15.63:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="exacall9.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="195.154.47.23:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.144.174:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="52.87.140.12" job="windows_exporter" service="windows_exporter" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="163.172.125.94:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.157:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.93.195:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.7.136:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.231:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="93.189.103.168" job="windows_exporter" service="windows_exporter" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="glowork.exacall.com:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="163.172.125.164:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.145:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.7.243:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.15.145.101:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.237:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.162.146:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.15.144.72:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.15.158.6:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.209:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.9.116:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.190:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.198:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.251:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.5.141:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.6.63:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.70.73:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="195.154.58.47:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.144.171:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="93.189.103.176" job="windows_exporter" service="windows_exporter" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="212.83.162.65:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.26.234:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.28.88:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.10.224:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.25.95:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="195.154.59.67:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.192:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.44.15:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.93.197:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.26.147:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.5.127:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.210.70.239:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="62.4.15.221:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.25.144:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.158.20.147:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="ExporterDown" instance="51.159.95.30:9100" job="node" service="node" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
HttpStatusCode (2 active)
alert: HttpStatusCode
expr: probe_http_status_code
  <= 199 or probe_http_status_code >= 400
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    HTTP status code is not 200-399
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: HTTP Status Code (instance {{ $labels.instance }})
Labels State Active Since Value
alertname="HttpStatusCode" instance="https://app.rasseed.com/" job="blackbox" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 0
alertname="HttpStatusCode" instance="https://s1.essal.co" job="blackbox" severity="critical" firing 2024-08-14 14:28:13.569457728 +0000 UTC 502
PhysicalComponentTooHot (4 active)
alert: PhysicalComponentTooHot
expr: node_hwmon_temp_celsius
  > 75
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Physical hardware component too hot
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Physical component too hot (instance {{ $labels.instance }})
Labels State Active Since Value
alertname="PhysicalComponentTooHot" chip="platform_coretemp_0" instance="localhost:9100" job="node" sensor="temp2" service="node" severity="warning" firing 2024-08-14 14:28:13.569457728 +0000 UTC 100
alertname="PhysicalComponentTooHot" chip="platform_coretemp_0" instance=":9100" job="node" sensor="temp1" service="node" severity="warning" firing 2024-08-14 14:28:13.569457728 +0000 UTC 100
alertname="PhysicalComponentTooHot" chip="platform_coretemp_0" instance=":9100" job="node" sensor="temp2" service="node" severity="warning" firing 2024-08-14 14:28:13.569457728 +0000 UTC 100
alertname="PhysicalComponentTooHot" chip="platform_coretemp_0" instance="localhost:9100" job="node" sensor="temp1" service="node" severity="warning" firing 2024-08-14 14:28:13.569457728 +0000 UTC 100
HighCpuLoad (0 active)
alert: HighCpuLoad
expr: 100
  - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
  > 90
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    CPU load is > 90%
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: High CPU load (instance {{ $labels.instance }})
HttpSlowRequests (0 active)
alert: HttpSlowRequests
expr: avg_over_time(probe_http_duration_seconds[1m])
  > 1
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    HTTP request took more than 1s
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: HTTP slow requests (instance {{ $labels.instance }})
NodeOvertemperatureAlarm (0 active)
alert: NodeOvertemperatureAlarm
expr: node_hwmon_temp_alarm
  == 1
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Physical node temperature alarm triggered
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Node overtemperature alarm (instance {{ $labels.instance }})
OutOfDiskSpace (0 active)
alert: OutOfDiskSpace
expr: (node_filesystem_avail_bytes{mountpoint="/"}
  * 100) / node_filesystem_size_bytes{mountpoint="/"} < 10
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Disk is almost full (< 10% left)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Out of disk space (instance {{ $labels.instance }})
OutOfInodes (0 active)
alert: OutOfInodes
expr: node_filesystem_files_free{mountpoint="/"}
  / node_filesystem_files{mountpoint="/"} * 100 < 10
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Disk is almost running out of available inodes (< 10% left)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Out of inodes (instance {{ $labels.instance }})
OutOfMemory (0 active)
alert: OutOfMemory
expr: (100
  - (((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes
  - node_memory_Cached_bytes - node_memory_Shmem_bytes) / node_memory_MemTotal_bytes)
  * 100)) < 5
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Node memory is filling up (< 5% left)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Out of memory (instance {{ $labels.instance }})
PrometheusConfigurationReload (0 active)
alert: PrometheusConfigurationReload
expr: prometheus_config_last_reload_successful
  != 1
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Prometheus configuration reload error
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Prometheus configuration reload (instance {{ $labels.instance }})
PrometheusNotConnectedToAlertmanager (0 active)
alert: PrometheusNotConnectedToAlertmanager
expr: prometheus_notifications_alertmanagers_discovered
  < 1
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Prometheus cannot connect the alertmanager
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Prometheus not connected to alertmanager (instance {{ $labels.instance
    }})
SlowPing (0 active)
alert: SlowPing
expr: avg_over_time(probe_icmp_duration_seconds[1m])
  > 1
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Blackbox ping took more than 1s
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Slow ping (instance {{ $labels.instance }})
SlowProbe (0 active)
alert: SlowProbe
expr: avg_over_time(probe_duration_seconds[1m])
  > 2
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Blackbox probe took more than 1s to complete
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Slow probe (instance {{ $labels.instance }})
SslCertificateExpired (0 active)
alert: SslCertificateExpired
expr: probe_ssl_earliest_cert_expiry
  - time() <= 0
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    SSL certificate has expired already
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: SSL certificate expired (instance {{ $labels.instance }})
SslCertificateWillExpireSoon (0 active)
alert: SslCertificateWillExpireSoon
expr: probe_ssl_earliest_cert_expiry
  - time() < 86400 * 7
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    SSL certificate expires in 7 days
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: SSL certificate will expire soon (instance {{ $labels.instance }})
SwapIsFillingUp (0 active)
alert: SwapIsFillingUp
expr: (1
  - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 90
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Swap is filling up (>90%)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Swap is filling up (instance {{ $labels.instance }})
UnusualDiskReadLatency (0 active)
alert: UnusualDiskReadLatency
expr: rate(node_disk_read_time_seconds_total[1m])
  / rate(node_disk_reads_completed_total[1m]) > 100
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Disk latency is growing (read operations > 100ms)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Unusual disk read latency (instance {{ $labels.instance }})
UnusualDiskReadRate (0 active)
alert: UnusualDiskReadRate
expr: sum
  by(instance) (irate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Disk is probably reading too much data (> 50 MB/s)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Unusual disk read rate (instance {{ $labels.instance }})
UnusualDiskWriteLatency (0 active)
alert: UnusualDiskWriteLatency
expr: rate(node_disk_write_time_seconds_total[1m])
  / rate(node_disk_writes_completed_total[1m]) > 100
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Disk latency is growing (write operations > 100ms)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Unusual disk write latency (instance {{ $labels.instance }})
UnusualDiskWriteRate (0 active)
alert: UnusualDiskWriteRate
expr: sum
  by(instance) (irate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Disk is probably writing too much data (> 50 MB/s)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Unusual disk write rate (instance {{ $labels.instance }})
UnusualNetworkThroughputIn (0 active)
alert: UnusualNetworkThroughputIn
expr: sum
  by(instance) (irate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Host network interfaces are probably receiving too much data (> 100 MB/s)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Unusual network throughput in (instance {{ $labels.instance }})
UnusualNetworkThroughputOut (0 active)
alert: UnusualNetworkThroughputOut
expr: sum
  by(instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
  severity: warning
annotations:
  description: |-
    Host network interfaces are probably sending too much data (> 100 MB/s)
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Unusual network throughput out (instance {{ $labels.instance }})
/prometheus/prometheus/nginx.rule > nginx
NignxDown (0 active)
alert: NignxDown
expr: nginx_up{instance="app.rasseed.com",job="nginx",service="nginx"}
  == 0
for: 5m
labels:
  severity: error
annotations:
  description: Nginx on {{ $labels.instance }} of job {{ $labels.job }} has been down
    for more than 5 minutes.
  summary: Nginx on {{ $labels.instance }} down
/prometheus/prometheus/windows-infra.rule > windows
WindowsServerCollectorError (0 active)
alert: WindowsServerCollectorError
expr: windows_exporter_collector_success
  == 0
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Collector {{ $labels.collector }} was not successful
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Windows Server collector Error (instance {{ $labels.instance }})
WindowsServerCpuUsage (0 active)
alert: WindowsServerCpuUsage
expr: 100
  - (avg by(instance) (rate(windows_cpu_time_total{mode="idle"}[2m])) * 100)
  > 90
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    CPU Usage is more than 90%
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Windows Server CPU Usage (instance {{ $labels.instance }})
WindowsServerDiskSpaceUsage (0 active)
alert: WindowsServerDiskSpaceUsage
expr: 100
  - 100 * ((windows_logical_disk_free_bytes / 1024 / 1024) / (windows_logical_disk_size_bytes
  / 1024 / 1024)) > 90
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Disk usage is more than 90%
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Windows Server disk Space Usage (instance {{ $labels.instance }})
WindowsServerMemoryUsage (0 active)
alert: WindowsServerMemoryUsage
expr: 100
  - ((windows_os_physical_memory_free_bytes / windows_cs_physical_memory_bytes) *
  100) > 90
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Memory usage is more than 90%
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Windows Server memory Usage (instance {{ $labels.instance }})
WindowsServerServiceStatus (0 active)
alert: WindowsServerServiceStatus
expr: windows_service_status{status="ok"}
  != 1
for: 5m
labels:
  severity: critical
annotations:
  description: |-
    Windows Service state is not OK
      VALUE = {{ $value }}
      LABELS: {{ $labels }}
  summary: Windows Server service Status (instance {{ $labels.instance }})