Rule |
State |
Error |
Last Evaluation |
Evaluation Time |
alert: InstanceDown
expr: up == 0
for: 10s
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.'
summary: Endpoint {{ $labels.instance }} down
|
ok
|
|
50.123s ago
|
831.8us |
alert: PrometheusConfigurationReloadFailure
expr: prometheus_config_last_reload_successful != 1
for: 1m
labels:
severity: warning
annotations:
description: |-
Prometheus configuration reload error
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Prometheus configuration reload failure (instance {{ $labels.instance }})
|
ok
|
|
50.122s ago
|
144.4us |
alert: PrometheusTooManyRestarts
expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 2
for: 5m
labels:
severity: warning
annotations:
description: |-
Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Prometheus too many restarts (instance {{ $labels.instance }})
|
ok
|
|
50.122s ago
|
250.5us |
alert: HostMemoryUnderMemoryPressure
expr: rate(node_vmstat_pgmajfault[2m]) > 8
labels:
severity: warning
annotations:
description: |-
The node is under heavy memory pressure. High rate of major page faults
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host memory under memory pressure (instance {{ $labels.instance }})
|
ok
|
|
50.122s ago
|
173.6us |
alert: HostOutOfMemory
expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 30
labels:
severity: warning
annotations:
description: |-
Node memory is filling up (< 30% left)
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host out of memory (instance {{ $labels.instance }})
|
ok
|
|
50.123s ago
|
304.7us |
alert: HostOutOfMemoryCritical
expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 10
labels:
severity: critical
annotations:
description: |-
Node memory is filling up (< 10% left)
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host out of memory (instance {{ $labels.instance }})
|
ok
|
|
50.123s ago
|
271.5us |
alert: HostUnusualNetworkThroughputIn
expr: (sum by(instance) (irate(node_network_receive_bytes_total[2m]))) / (1024 * 1024) > 1
for: 2m
labels:
severity: warning
annotations:
description: |-
Host network interfaces are probably receiving too much data (> 1 MB/s)
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host unusual network throughput in (instance {{ $labels.instance }})
|
ok
|
|
50.123s ago
|
962.9us |
alert: HostUnusualNetworkThroughputOut
expr: (sum by(instance) (irate(node_network_transmit_bytes_total[2m]))) / (1024 * 1024) > 1
for: 2m
labels:
severity: warning
|
ok
|
|
50.122s ago
|
911.3us |
alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 30
labels:
severity: warning
annotations:
description: |-
Disk is almost full (< 30% left)
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host out of disk space (instance {{ $labels.instance }})
|
ok
|
|
50.121s ago
|
291.3us |
alert: HostDiskWillFillIn48Hours
expr: predict_linear(node_filesystem_avail_bytes[1h], 48 * 3600) < 0
for: 5m
labels:
severity: warning
annotations:
description: |-
Disk will fill in 48 hours at current write rate
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host disk will fill in 4 hours (instance {{ $labels.instance }})
|
ok
|
|
50.121s ago
|
198.7us |
alert: HostOutOfDiskInodes
expr: (node_filesystem_files_free / node_filesystem_files) * 100 < 50
labels:
severity: warning
annotations:
description: |-
Disk inodes is almost full (< 50% left)
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host out of disk inodes (instance {{ $labels.instance }})
|
ok
|
|
50.121s ago
|
306us |
alert: HostHighCpuLoad
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 60
labels:
severity: warning
annotations:
description: |-
CPU load is > 80%
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Host high CPU load (instance {{ $labels.instance }})
|
ok
|
|
50.121s ago
|
346.1us |
alert: containerStatusLastTerminatedReason
expr: increase(kube_pod_container_status_last_terminated_reason[1d]) > 0
labels:
severity: warning
annotations:
description: |-
Container VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: Container {{ $labels.instance }} down
|
ok
|
|
50.121s ago
|
69.73ms |
alert: RaftLeaderChanges
expr: changes(consensus_etcdraft_is_leader[2h]) > 0
labels:
severity: warning
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ' raft leader changed (instance {{ $labels.instance }})'
|
ok
|
|
50.051s ago
|
1.336ms |
alert: ledgerTransactionCountPerMinuteWarning
expr: rate(ledger_transaction_count[1m]) * 60 > 16
labels:
severity: warning
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ' ledger_transaction_count more than 16 per minute (instance {{ $labels.instance }})'
|
ok
|
|
50.05s ago
|
396.2us |
alert: ledgerTransactionCountPerMinuteCritical
expr: rate(ledger_transaction_count[1m]) * 60 > 60
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ' ledger_transaction_count more than 60 per minute (instance {{ $labels.instance }})'
|
ok
|
|
50.049s ago
|
445.6us |
alert: chaincodeExecuteTimeouts
expr: rate(chaincode_execute_timeouts[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ' chaincodeExecuteTimeouts per minute (instance {{ $labels.instance }})'
|
ok
|
|
50.049s ago
|
182.3us |
alert: chaincodeLaunchFailures
expr: rate(chaincode_launch_failures[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ' chaincodeLaunchFailures per minute (instance {{ $labels.instance }})'
|
ok
|
|
50.049s ago
|
487.4us |
alert: chaincodeLaunchTimeouts
expr: rate(chaincode_launch_timeouts[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ' chaincodeLaunchFailures per minute (instance {{ $labels.instance }})'
|
ok
|
|
50.049s ago
|
137.4us |
alert: endorserChaincodeInstantiationFailures
expr: rate(endorser_chaincode_instantiation_failures[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: endorserChaincodeInstantiationFailures (instance {{ $labels.instance }})
|
ok
|
|
50.049s ago
|
343.1us |
alert: endorserDuplicateTransactionFailures
expr: rate(endorser_duplicate_transaction_failures[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: endorserDuplicateTransactionFailures (instance {{ $labels.instance }})
|
ok
|
|
50.048s ago
|
193us |
alert: endorserEndorsementFailures
expr: rate(endorser_endorsement_failures[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: endorser_endorsement_failures (instance {{ $labels.instance }})
|
ok
|
|
50.048s ago
|
146us |
alert: endorserProposalAclFailures
expr: rate(endorser_proposal_acl_failures[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: endorserProposalAclFailures (instance {{ $labels.instance }})
|
ok
|
|
50.049s ago
|
199.6us |
alert: endorserProposalValidationFailures
expr: rate(endorser_proposal_validation_failures[30s]) * 30 > 0
labels:
severity: critical
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: endorserProposalValidationFailures (instance {{ $labels.instance }})
|
ok
|
|
50.049s ago
|
171us |
alert: peerMemoryUsage
expr: container_memory_working_set_bytes{container="peer",pod=~"peer.*"} / (1204 * 1024) > 600
labels:
severity: medium
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: peerMemoryUsage greater than 600Mb (instance {{ $labels.instance }})
|
ok
|
|
50.049s ago
|
304.8us |
alert: couchDbMemoryUsage
expr: container_memory_working_set_bytes{container="couchdb",pod=~"peer.*"} / (1204 * 1024) > 1200
labels:
severity: medium
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: couchDbMemoryUsage greater than 1200Mb (instance {{ $labels.instance }})
|
ok
|
|
50.049s ago
|
266.5us |
alert: ordererMemoryUsage
expr: container_memory_working_set_bytes{container=~"orderer."} / (1204 * 1024) > 100
labels:
severity: medium
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: ordererMemoryUsage greater than 100Mb (instance {{ $labels.instance }})
|
ok
|
|
50.048s ago
|
588.1us |
alert: chainCodeMemoryUsage
expr: container_memory_working_set_bytes{name=~"pr.-peer..pr..gouze.io-.*"} / (1204 * 1024) > 25
labels:
severity: medium
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: chainCodeMemoryUsage greater than 25Mb (instance {{ $labels.instance }})
|
ok
|
|
50.048s ago
|
452.3us |
alert: fabricUsedDisk
expr: kubelet_volume_stats_used_bytes{persistentvolumeclaim=~"production-.*|database-*"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"production-.*|database-*"} * 100 > 60
labels:
severity: medium
annotations:
description: |2-
VALUE = {{ $value }}
LABELS: {{ $labels }}
summary: fabricUsedDisk greater than 60% (instance {{ $labels.instance }})
|
ok
|
|
50.048s ago
|
1.346ms |