Skip to content

Commit 85cf7fe

Browse files
committed
updated monitorings to 0.5.1
1 parent 1dedbb9 commit 85cf7fe

14 files changed

+175
-132
lines changed

daemonset-incomplete-variables.tf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,11 @@ variable "daemonset_incomplete_name_prefix" {
6262
variable "daemonset_incomplete_name_suffix" {
6363
type = string
6464
default = ""
65+
}
66+
67+
variable "daemonset_incomplete_priority" {
68+
description = "Number from 1 (high) to 5 (low)."
69+
70+
type = number
71+
default = null
6572
}

daemonset-incomplete.tf

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,27 @@ locals {
66
}
77

88
module "daemonset_incomplete" {
9-
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"
10-
11-
name = "Daemonset Incomplete"
12-
query = "min(${var.daemonset_incomplete_evaluation_period}):max:kubernetes_state.daemonset.scheduled{${local.daemonset_incomplete_filter}} by {daemonset,cluster_name} - min:kubernetes_state.daemonset.ready{${local.daemonset_incomplete_filter}} by {daemonset,cluster_name} > 0"
13-
14-
enabled = var.daemonset_incomplete_enabled
15-
alerting_enabled = var.daemonset_incomplete_alerting_enabled
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5.1"
1610

11+
name = "Daemonset Incomplete"
12+
query = "min(${var.daemonset_incomplete_evaluation_period}):max:kubernetes_state.daemonset.scheduled{${local.daemonset_incomplete_filter}} by {daemonset,cluster_name} - min:kubernetes_state.daemonset.ready{${local.daemonset_incomplete_filter}} by {daemonset,cluster_name} > 0"
1713
alert_message = "Kubernetes Daemonset {{daemonset}} is incomplete. Missing pod count:{{value}}"
1814
recovery_message = "Kubernetes Daemonset {{daemonset}} has recovered"
1915

20-
service = var.service
21-
env = var.alert_env
22-
severity = var.daemonset_incomplete_severity
23-
note = var.daemonset_incomplete_note
24-
docs = var.daemonset_incomplete_docs
25-
additional_tags = var.additional_tags
26-
27-
notification_channel = var.notification_channel
28-
29-
require_full_window = true
30-
16+
# monitor level vars
17+
enabled = var.daemonset_incomplete_enabled
18+
alerting_enabled = var.daemonset_incomplete_alerting_enabled
3119
critical_threshold = var.daemonset_incomplete_critical
32-
locked = var.locked
20+
# no warning threshold for this monitor
21+
priority = var.daemonset_incomplete_priority
22+
severity = var.daemonset_incomplete_severity
23+
docs = var.daemonset_incomplete_docs
24+
note = var.daemonset_incomplete_note
25+
26+
# module level vars
27+
env = var.alert_env
28+
service = var.service
29+
notification_channel = var.notification_channel
30+
additional_tags = var.additional_tags
31+
locked = var.locked
3332
}

hpa-status-variables.tf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,11 @@ variable "hpa_status_name_prefix" {
5656
variable "hpa_status_name_suffix" {
5757
type = string
5858
default = ""
59+
}
60+
61+
variable "hpa_status_priority" {
62+
description = "Number from 1 (high) to 5 (low)."
63+
64+
type = number
65+
default = null
5966
}

hpa-status.tf

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,28 @@ locals {
77
}
88

99
module "hpa_status" {
10-
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"
11-
12-
name = "HPA Status not OK"
13-
query = "avg(${var.hpa_status_evaluation_period}):avg:kubernetes_state.hpa.condition{${local.hpa_status_filter}} by {hpa,kube_namespace,status,condition} < 1"
14-
15-
enabled = var.hpa_status_enabled
16-
alerting_enabled = var.hpa_status_alerting_enabled
10+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5.1"
1711

12+
name = "HPA Status not OK"
13+
query = "avg(${var.hpa_status_evaluation_period}):avg:kubernetes_state.hpa.condition{${local.hpa_status_filter}} by {hpa,kube_namespace,status,condition} < 1"
1814
alert_message = "Kubernetes HPA Status for Node {{node}} is not ok"
1915
recovery_message = "Kubernetes HPA Status for Node {{node}} has recovered"
2016

21-
service = var.service
22-
env = var.alert_env
23-
severity = var.hpa_status_severity
24-
note = var.hpa_status_note
25-
docs = var.hpa_status_docs
26-
additional_tags = var.additional_tags
27-
28-
notification_channel = var.notification_channel
29-
30-
require_full_window = false
3117

18+
# monitor level vars
19+
enabled = var.hpa_status_enabled
20+
alerting_enabled = var.hpa_status_alerting_enabled
3221
critical_threshold = 1
33-
locked = var.locked
22+
# No warning_threshold possible
23+
priority = var.hpa_status_priority
24+
severity = var.hpa_status_severity
25+
docs = var.hpa_status_docs
26+
note = var.hpa_status_note
27+
28+
# module level vars
29+
env = var.alert_env
30+
service = var.service
31+
notification_channel = var.notification_channel
32+
additional_tags = var.additional_tags
33+
locked = var.locked
3434
}

node-memory-used-percent-variables.tf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,11 @@ variable "node_memory_used_percent_name_prefix" {
6868
variable "node_memory_used_percent_name_suffix" {
6969
type = string
7070
default = ""
71+
}
72+
73+
variable "node_memory_used_percent_priority" {
74+
description = "Number from 1 (high) to 5 (low)."
75+
76+
type = number
77+
default = null
7178
}

node-memory-used-percent.tf

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,27 @@ locals {
66
}
77

88
module "node_memory_used_percent" {
9-
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"
10-
11-
name = "Memory Used Percent"
12-
query = "avg(${var.node_memory_used_percent_evaluation_period}):( 100 * max:kubernetes.memory.usage{${local.node_memory_used_percent_filter}} by {host,cluster_name} ) / max:kubernetes.memory.capacity{${local.node_memory_used_percent_filter}} by {host,cluster_name} > ${var.node_memory_used_percent_critical}"
13-
14-
enabled = var.node_memory_used_percent_enabled
15-
alerting_enabled = var.node_memory_used_percent_alerting_enabled
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5.1"
1610

11+
name = "Memory Used Percent"
12+
query = "avg(${var.node_memory_used_percent_evaluation_period}):( 100 * max:kubernetes.memory.usage{${local.node_memory_used_percent_filter}} by {host,cluster_name} ) / max:kubernetes.memory.capacity{${local.node_memory_used_percent_filter}} by {host,cluster_name} > ${var.node_memory_used_percent_critical}"
1713
alert_message = "Available memory on ${var.service} Node {{host.name}} has dropped below {{threshold}} and has {{value}}% available"
1814
recovery_message = "Available memory on ${var.service} Node {{host.name}} has recovered {{value}}%"
1915

20-
service = var.service
21-
env = var.alert_env
22-
severity = var.node_memory_used_percent_severity
23-
note = var.node_memory_used_percent_note
24-
docs = var.node_memory_used_percent_docs
25-
additional_tags = var.additional_tags
26-
27-
notification_channel = var.notification_channel
28-
29-
require_full_window = true
30-
16+
# monitor level vars
17+
enabled = var.node_memory_used_percent_enabled
18+
alerting_enabled = var.node_memory_used_percent_alerting_enabled
3119
critical_threshold = var.node_memory_used_percent_critical
3220
warning_threshold = var.node_memory_used_percent_warning
33-
locked = var.locked
21+
priority = var.node_memory_used_percent_priority
22+
severity = var.node_memory_used_percent_severity
23+
docs = var.node_memory_used_percent_docs
24+
note = var.node_memory_used_percent_note
25+
26+
# module level vars
27+
env = var.alert_env
28+
service = var.service
29+
notification_channel = var.notification_channel
30+
additional_tags = var.additional_tags
31+
locked = var.locked
3432
}

node-status-variables.tf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,11 @@ variable "node_status_name_prefix" {
5656
variable "node_status_name_suffix" {
5757
type = string
5858
default = ""
59+
}
60+
61+
variable "node_status_priority" {
62+
description = "Number from 1 (high) to 5 (low)."
63+
64+
type = number
65+
default = null
5966
}

node-status.tf

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@ locals {
66
}
77

88
module "node_status" {
9-
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"
10-
11-
name = "Node Status not OK"
12-
query = "avg(${var.node_status_evaluation_period}):avg:kubernetes_state.node.status{${local.node_status_filter}} by {cluster_name,node} < 1"
13-
14-
enabled = var.node_status_enabled
15-
alerting_enabled = var.node_status_alerting_enabled
16-
17-
alert_message = "Kubernetes Node Status for Node {{node}} is not ok"
18-
recovery_message = "Kubernetes Node Status for Node {{node}} has recovered"
19-
20-
service = var.service
21-
env = var.alert_env
22-
severity = var.node_status_severity
23-
note = var.node_status_note
24-
docs = var.node_status_docs
25-
additional_tags = var.additional_tags
26-
27-
notification_channel = var.notification_channel
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5.1"
2810

11+
name = "Node Status not OK"
12+
query = "avg(${var.node_status_evaluation_period}):avg:kubernetes_state.node.status{${local.node_status_filter}} by {cluster_name,node} < 1"
13+
alert_message = "Kubernetes Node Status for Node {{node}} is not ok"
14+
recovery_message = "Kubernetes Node Status for Node {{node}} has recovered"
2915
require_full_window = false
3016

17+
# monitor level vars
18+
enabled = var.node_status_enabled
19+
alerting_enabled = var.node_status_alerting_enabled
3120
critical_threshold = 1
32-
locked = var.locked
21+
# No warning possible for status that is either 0 or 1
22+
priority = var.node_status_priority
23+
severity = var.node_status_severity
24+
docs = var.node_status_docs
25+
note = var.node_status_note
26+
27+
# module level vars
28+
env = var.alert_env
29+
service = var.service
30+
notification_channel = var.notification_channel
31+
additional_tags = var.additional_tags
32+
locked = var.locked
3333
}

pod-ready-variables.tf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,11 @@ variable "pod_ready_name_prefix" {
5656
variable "pod_ready_name_suffix" {
5757
type = string
5858
default = ""
59+
}
60+
61+
variable "pod_ready_priority" {
62+
description = "Number from 1 (high) to 5 (low)."
63+
64+
type = number
65+
default = null
5966
}

pod-ready.tf

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,27 @@ locals {
66
}
77

88
module "pod_ready" {
9-
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"
10-
11-
name = "Pod status not ready"
12-
query = "min(${var.pod_ready_evaluation_period}):sum:kubernetes_state.pod.count{${local.pod_ready_filter}} by {cluster_name,namespace} - sum:kubernetes_state.pod.ready{${local.pod_ready_filter}} by {cluster_name,namespace} > 0"
13-
14-
enabled = var.pod_ready_enabled
15-
alerting_enabled = var.pod_ready_alerting_enabled
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5.1"
1610

11+
name = "Pod status not ready"
12+
query = "min(${var.pod_ready_evaluation_period}):sum:kubernetes_state.pod.count{${local.pod_ready_filter}} by {cluster_name,namespace} - sum:kubernetes_state.pod.ready{${local.pod_ready_filter}} by {cluster_name,namespace} > 0"
1713
alert_message = "Kubernetes Pod {{value}} status not ready in namespace {{namespace}} "
1814
recovery_message = "Kubernetes Pod status recovered in namespace {{namespace}}"
1915

20-
service = var.service
21-
env = var.alert_env
22-
severity = var.pod_ready_severity
23-
note = var.pod_ready_note
24-
docs = var.pod_ready_docs
25-
additional_tags = var.additional_tags
26-
27-
notification_channel = var.notification_channel
28-
29-
require_full_window = true
30-
16+
# monitor level vars
17+
enabled = var.pod_ready_enabled
18+
alerting_enabled = var.pod_ready_alerting_enabled
3119
critical_threshold = 0
32-
locked = var.locked
20+
# No warning possible for status that is either 0 or 1
21+
priority = var.pod_ready_priority
22+
severity = var.pod_ready_severity
23+
docs = var.pod_ready_docs
24+
note = var.pod_ready_note
25+
26+
# module level vars
27+
env = var.alert_env
28+
service = var.service
29+
notification_channel = var.notification_channel
30+
additional_tags = var.additional_tags
31+
locked = var.locked
3332
}

0 commit comments

Comments
 (0)