|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > alertmanager.rules
|
alert: AlertmanagerConfigInconsistent
expr: count_values by(service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) / on(service) group_left() label_replace(max by(name, job, namespace, controller) (prometheus_operator_spec_replicas{controller="alertmanager",job="prometheus-operator",namespace="monitoring"}), "service", "alertmanager-$1", "name", "(.*)") != 1
for: 5m
labels:
severity: critical
annotations:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > general.rules
|
| Labels |
State |
Active Since |
Value |
|
alertname="TargetDown"
job="kube-controller-manager"
namespace="kube-system"
service="kube-controller-manager-prometheus-discovery"
severity="warning"
|
firing |
2025-08-19 10:10:15 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-controller-manager/kube-controller-manager-prometheus-discovery targets in kube-system namespace are down.
|
|
alert: Watchdog
expr: vector(1)
labels:
severity: none
annotations:
message: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
| Labels |
State |
Active Since |
Value |
|
alertname="Watchdog"
severity="none"
|
firing |
2025-11-05 12:28:15.022073385 +0000 UTC |
1 |
| Annotations |
- message
- This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kube-apiserver-slos
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kube-state-metrics
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeStateMetricsListErrors"
severity="critical"
|
firing |
2025-11-05 12:49:00.417432371 +0000 UTC |
1 |
| Annotations |
- message
- kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-apps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: KubeJobFailed
expr: kube_job_failed{job="kube-state-metrics"} > 0
for: 15m
labels:
severity: warning
annotations:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-resources
|
| Labels |
State |
Active Since |
Value |
|
alertname="CPUThrottlingHigh"
container="kube-rbac-proxy"
namespace="monitoring"
pod="arm-exporter-jzhqp"
severity="warning"
|
pending |
2025-12-17 11:30:47.55260138 +0000 UTC |
0.4268292682926829 |
| Annotations |
- message
- 42.68% throttling of CPU in namespace monitoring for container kube-rbac-proxy in pod arm-exporter-jzhqp.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="grafana"
namespace="monitoring"
pod="grafana-594fc7f587-v7wlr"
severity="warning"
|
firing |
2025-12-13 20:18:17.55260138 +0000 UTC |
0.3807531380753138 |
| Annotations |
- message
- 38.08% throttling of CPU in namespace monitoring for container grafana in pod grafana-594fc7f587-v7wlr.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="kube-rbac-proxy"
namespace="monitoring"
pod="node-exporter-4xk97"
severity="warning"
|
pending |
2025-12-17 11:37:47.55260138 +0000 UTC |
0.26829268292682923 |
| Annotations |
- message
- 26.83% throttling of CPU in namespace monitoring for container kube-rbac-proxy in pod node-exporter-4xk97.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="kube-rbac-proxy"
namespace="monitoring"
pod="arm-exporter-7w8n2"
severity="warning"
|
pending |
2025-12-17 11:32:17.55260138 +0000 UTC |
0.4761904761904762 |
| Annotations |
- message
- 47.62% throttling of CPU in namespace monitoring for container kube-rbac-proxy in pod arm-exporter-7w8n2.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="kube-rbac-proxy"
namespace="monitoring"
pod="arm-exporter-gjg9w"
severity="warning"
|
pending |
2025-12-17 11:38:17.55260138 +0000 UTC |
0.3013698630136986 |
| Annotations |
- message
- 30.14% throttling of CPU in namespace monitoring for container kube-rbac-proxy in pod arm-exporter-gjg9w.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="node-exporter"
namespace="monitoring"
pod="node-exporter-kgllz"
severity="warning"
|
firing |
2025-11-05 12:48:47.55260138 +0000 UTC |
0.7678571428571428 |
| Annotations |
- message
- 76.79% throttling of CPU in namespace monitoring for container node-exporter in pod node-exporter-kgllz.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="node-exporter"
namespace="monitoring"
pod="node-exporter-4xk97"
severity="warning"
|
firing |
2025-11-05 12:50:17.55260138 +0000 UTC |
0.5930232558139534 |
| Annotations |
- message
- 59.3% throttling of CPU in namespace monitoring for container node-exporter in pod node-exporter-4xk97.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="kube-rbac-proxy"
namespace="monitoring"
pod="node-exporter-t242n"
severity="warning"
|
pending |
2025-12-17 11:28:47.55260138 +0000 UTC |
0.3076923076923077 |
| Annotations |
- message
- 30.77% throttling of CPU in namespace monitoring for container kube-rbac-proxy in pod node-exporter-t242n.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="node-exporter"
namespace="monitoring"
pod="node-exporter-t242n"
severity="warning"
|
firing |
2025-11-05 12:50:17.55260138 +0000 UTC |
0.6788990825688073 |
| Annotations |
- message
- 67.89% throttling of CPU in namespace monitoring for container node-exporter in pod node-exporter-t242n.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-storage
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-apiserver
|
|
|
alert: AggregatedAPIErrors
expr: sum by(name, namespace) (increase(aggregator_unavailable_apiservice_count[5m])) > 2
labels:
severity: warning
annotations:
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
|
alert: KubeAPIDown
expr: absent(up{job="apiserver"} == 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-controller-manager
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeControllerManagerDown"
severity="critical"
|
firing |
2023-12-18 16:25:14 +0000 UTC |
1 |
| Annotations |
- message
- KubeControllerManager has disappeared from Prometheus target discovery.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-kubelet
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > kubernetes-system-scheduler
|
alert: KubeSchedulerDown
expr: absent(up{job="kube-scheduler"} == 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > node-exporter
|
alert: NodeClockNotSynchronising
expr: min_over_time(node_timex_sync_status[5m]) == 0
for: 10m
labels:
severity: warning
annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
summary: Clock not synchronising.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: NodeNetworkReceiveErrs
expr: increase(node_network_receive_errs_total[2m]) > 10
for: 1h
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
|
alert: NodeNetworkTransmitErrs
expr: increase(node_network_transmit_errs_total[2m]) > 10
for: 1h
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > node-network
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > prometheus
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-k8s-rulefiles-0/monitoring-prometheus-k8s-rules.yaml > prometheus-operator
|
|
|
|
|