fix(infra): alert policy aggregation — drop cross_series_reducer
GCP rejected the policy with 'REDUCE_COUNT_FALSE cannot be applied to metrics with value type DOUBLE' — because ALIGN_FRACTION_TRUE already produces a fraction 0..1 per series, no need for an additional cross-series reducer. Simplified: alert when the per-series fraction < 1 for 5 min. Review M4 predicted this — uptime check filters needed double-checking against live GCP.
This commit is contained in:
parent
4ab0838ba2
commit
9a99a82e92
1 changed files with 5 additions and 5 deletions
|
|
@ -289,16 +289,16 @@ resource "google_monitoring_alert_policy" "health_failure" {
|
|||
conditions {
|
||||
display_name = "Uptime check failed > 5 min"
|
||||
condition_threshold {
|
||||
filter = "metric.type=\"monitoring.googleapis.com/uptime_check/check_passed\" AND metric.labels.check_id=\"${google_monitoring_uptime_check_config.health[each.key].uptime_check_id}\" AND resource.type=\"uptime_url\""
|
||||
filter = "metric.type=\"monitoring.googleapis.com/uptime_check/check_passed\" AND metric.labels.check_id=\"${google_monitoring_uptime_check_config.health[each.key].uptime_check_id}\" AND resource.type=\"uptime_url\""
|
||||
duration = "300s"
|
||||
# ALIGN_FRACTION_TRUE yields fraction of checks that returned true.
|
||||
# If the fraction stays < 1 (i.e. any probe failed) for 5 min → alert.
|
||||
comparison = "COMPARISON_LT"
|
||||
threshold_value = 1
|
||||
|
||||
aggregations {
|
||||
alignment_period = "60s"
|
||||
per_series_aligner = "ALIGN_FRACTION_TRUE"
|
||||
cross_series_reducer = "REDUCE_COUNT_FALSE"
|
||||
group_by_fields = ["resource.label.host"]
|
||||
alignment_period = "60s"
|
||||
per_series_aligner = "ALIGN_FRACTION_TRUE"
|
||||
}
|
||||
|
||||
trigger {
|
||||
|
|
|
|||
Loading…
Reference in a new issue