Prometheus Time Series Collection and Processing Server

Rules

linux			46.797s ago	12.89ms
Rule	State	Error	Last Evaluation	Evaluation Time
alert: PrometheusConfigurationReload expr: prometheus_config_last_reload_successful != 1 for: 5m labels: severity: critical annotations: description: \|- Prometheus configuration reload error VALUE = {{ $value }} LABELS: {{ $labels }} summary: Prometheus configuration reload (instance {{ $labels.instance }})	ok		46.798s ago	160.3us
alert: PrometheusNotConnectedToAlertmanager expr: prometheus_notifications_alertmanagers_discovered < 1 for: 5m labels: severity: critical annotations: description: \|- Prometheus cannot connect the alertmanager VALUE = {{ $value }} LABELS: {{ $labels }} summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})	ok		46.798s ago	44.19us
alert: ExporterDown expr: up == 0 for: 5m labels: severity: critical annotations: description: \|- Prometheus exporter down VALUE = {{ $value }} LABELS: {{ $labels }} summary: Exporter down (instance {{ $labels.instance }})	ok		46.798s ago	10.08ms
alert: OutOfMemory expr: (100 - (((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_Shmem_bytes) / node_memory_MemTotal_bytes) * 100)) < 5 for: 5m labels: severity: critical annotations: description: \|- Node memory is filling up (< 5% left) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Out of memory (instance {{ $labels.instance }})	ok		46.788s ago	253.2us
alert: UnusualNetworkThroughputIn expr: sum by(instance) (irate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100 for: 5m labels: severity: warning annotations: description: \|- Host network interfaces are probably receiving too much data (> 100 MB/s) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Unusual network throughput in (instance {{ $labels.instance }})	ok		46.788s ago	135.8us
alert: UnusualNetworkThroughputOut expr: sum by(instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100 for: 5m labels: severity: warning annotations: description: \|- Host network interfaces are probably sending too much data (> 100 MB/s) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Unusual network throughput out (instance {{ $labels.instance }})	ok		46.787s ago	112.8us
alert: UnusualDiskReadRate expr: sum by(instance) (irate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50 for: 5m labels: severity: warning annotations: description: \|- Disk is probably reading too much data (> 50 MB/s) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Unusual disk read rate (instance {{ $labels.instance }})	ok		46.787s ago	112.2us
alert: UnusualDiskWriteRate expr: sum by(instance) (irate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50 for: 5m labels: severity: warning annotations: description: \|- Disk is probably writing too much data (> 50 MB/s) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Unusual disk write rate (instance {{ $labels.instance }})	ok		46.787s ago	126.1us
alert: OutOfDiskSpace expr: (node_filesystem_avail_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"} < 10 for: 5m labels: severity: critical annotations: description: \|- Disk is almost full (< 10% left) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Out of disk space (instance {{ $labels.instance }})	ok		46.787s ago	103.5us
alert: OutOfInodes expr: node_filesystem_files_free{mountpoint="/"} / node_filesystem_files{mountpoint="/"} * 100 < 10 for: 5m labels: severity: critical annotations: description: \|- Disk is almost running out of available inodes (< 10% left) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Out of inodes (instance {{ $labels.instance }})	ok		46.787s ago	146.8us
alert: UnusualDiskReadLatency expr: rate(node_disk_read_time_seconds_total[1m]) / rate(node_disk_reads_completed_total[1m]) > 100 for: 5m labels: severity: warning annotations: description: \|- Disk latency is growing (read operations > 100ms) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Unusual disk read latency (instance {{ $labels.instance }})	ok		46.787s ago	146.2us
alert: UnusualDiskWriteLatency expr: rate(node_disk_write_time_seconds_total[1m]) / rate(node_disk_writes_completed_total[1m]) > 100 for: 5m labels: severity: warning annotations: description: \|- Disk latency is growing (write operations > 100ms) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Unusual disk write latency (instance {{ $labels.instance }})	ok		46.787s ago	167.9us
alert: HighCpuLoad expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90 for: 5m labels: severity: critical annotations: description: \|- CPU load is > 90% VALUE = {{ $value }} LABELS: {{ $labels }} summary: High CPU load (instance {{ $labels.instance }})	ok		46.787s ago	112.7us
alert: SwapIsFillingUp expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 90 for: 5m labels: severity: critical annotations: description: \|- Swap is filling up (>90%) VALUE = {{ $value }} LABELS: {{ $labels }} summary: Swap is filling up (instance {{ $labels.instance }})	ok		46.787s ago	94.93us
alert: PhysicalComponentTooHot expr: node_hwmon_temp_celsius > 75 for: 5m labels: severity: warning annotations: description: \|- Physical hardware component too hot VALUE = {{ $value }} LABELS: {{ $labels }} summary: Physical component too hot (instance {{ $labels.instance }})	ok		46.787s ago	420.6us
alert: NodeOvertemperatureAlarm expr: node_hwmon_temp_alarm == 1 for: 5m labels: severity: critical annotations: description: \|- Physical node temperature alarm triggered VALUE = {{ $value }} LABELS: {{ $labels }} summary: Node overtemperature alarm (instance {{ $labels.instance }})	ok		46.787s ago	31.38us
alert: SslCertificateWillExpireSoon expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 7 for: 5m labels: severity: warning annotations: description: \|- SSL certificate expires in 7 days VALUE = {{ $value }} LABELS: {{ $labels }} summary: SSL certificate will expire soon (instance {{ $labels.instance }})	ok		46.787s ago	53.69us
alert: SslCertificateExpired expr: probe_ssl_earliest_cert_expiry - time() <= 0 for: 5m labels: severity: critical annotations: description: \|- SSL certificate has expired already VALUE = {{ $value }} LABELS: {{ $labels }} summary: SSL certificate expired (instance {{ $labels.instance }})	ok		46.787s ago	46.55us
alert: SlowProbe expr: avg_over_time(probe_duration_seconds[1m]) > 2 for: 5m labels: severity: warning annotations: description: \|- Blackbox probe took more than 1s to complete VALUE = {{ $value }} LABELS: {{ $labels }} summary: Slow probe (instance {{ $labels.instance }})	ok		46.787s ago	65.52us
alert: HttpStatusCode expr: probe_http_status_code <= 199 or probe_http_status_code >= 400 for: 5m labels: severity: critical annotations: description: \|- HTTP status code is not 200-399 VALUE = {{ $value }} LABELS: {{ $labels }} summary: HTTP Status Code (instance {{ $labels.instance }})	ok		46.787s ago	252.8us
alert: HttpSlowRequests expr: avg_over_time(probe_http_duration_seconds[1m]) > 1 for: 5m labels: severity: warning annotations: description: \|- HTTP request took more than 1s VALUE = {{ $value }} LABELS: {{ $labels }} summary: HTTP slow requests (instance {{ $labels.instance }})	ok		46.787s ago	162.9us
alert: SlowPing expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1 for: 5m labels: severity: warning annotations: description: \|- Blackbox ping took more than 1s VALUE = {{ $value }} LABELS: {{ $labels }} summary: Slow ping (instance {{ $labels.instance }})	ok		46.787s ago	32.57us
nginx			17.914s ago	143.9us
Rule	State	Error	Last Evaluation	Evaluation Time
alert: NignxDown expr: nginx_up{instance="app.rasseed.com",job="nginx",service="nginx"} == 0 for: 5m labels: severity: error annotations: description: Nginx on {{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes. summary: Nginx on {{ $labels.instance }} down	ok		17.914s ago	139.5us
windows			39.964s ago	412.3us
Rule	State	Error	Last Evaluation	Evaluation Time
alert: WindowsServerCollectorError expr: windows_exporter_collector_success == 0 for: 5m labels: severity: critical annotations: description: \|- Collector {{ $labels.collector }} was not successful VALUE = {{ $value }} LABELS: {{ $labels }} summary: Windows Server collector Error (instance {{ $labels.instance }})	ok		39.964s ago	105us
alert: WindowsServerServiceStatus expr: windows_service_status{status="ok"} != 1 for: 5m labels: severity: critical annotations: description: \|- Windows Service state is not OK VALUE = {{ $value }} LABELS: {{ $labels }} summary: Windows Server service Status (instance {{ $labels.instance }})	ok		39.964s ago	42.9us
alert: WindowsServerCpuUsage expr: 100 - (avg by(instance) (rate(windows_cpu_time_total{mode="idle"}[2m])) * 100) > 90 for: 5m labels: severity: critical annotations: description: \|- CPU Usage is more than 90% VALUE = {{ $value }} LABELS: {{ $labels }} summary: Windows Server CPU Usage (instance {{ $labels.instance }})	ok		39.964s ago	106us
alert: WindowsServerMemoryUsage expr: 100 - ((windows_os_physical_memory_free_bytes / windows_cs_physical_memory_bytes) * 100) > 90 for: 5m labels: severity: critical annotations: description: \|- Memory usage is more than 90% VALUE = {{ $value }} LABELS: {{ $labels }} summary: Windows Server memory Usage (instance {{ $labels.instance }})	ok		39.964s ago	57.5us
alert: WindowsServerDiskSpaceUsage expr: 100 - 100 * ((windows_logical_disk_free_bytes / 1024 / 1024) / (windows_logical_disk_size_bytes / 1024 / 1024)) > 90 for: 5m labels: severity: critical annotations: description: \|- Disk usage is more than 90% VALUE = {{ $value }} LABELS: {{ $labels }} summary: Windows Server disk Space Usage (instance {{ $labels.instance }})	ok		39.964s ago	91.74us

Rules

linux

46.797s ago

12.89ms

nginx

17.914s ago

143.9us

windows

39.964s ago

412.3us