NEW: EKS monitoring
This commit is contained in:
parent
9a3ec387a9
commit
bcdbb23221
27
modules/ManagementGovernance/Monitoring.EKS/README.md
Normal file
27
modules/ManagementGovernance/Monitoring.EKS/README.md
Normal file
@ -0,0 +1,27 @@
|
||||
# Monitoring module
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
||||
Unlike other monitoring modules which discovers resources details automatically, EKS pod name need to be supplied to this module.
|
||||
AWS cli does not provide pod information.
|
||||
|
||||
## Example
|
||||
```terraform
|
||||
data "aws_eks_clusters" "eks-clusters" {}
|
||||
|
||||
module "eks-monitoring" {
|
||||
cw-alarm-prefix = local.cw-alarm-prefix
|
||||
for_each = data.aws_eks_clusters.eks-clusters.names
|
||||
source = "../../modules/ManagementGovernance/Monitoring.EKS"
|
||||
default-tags = local.default-tags
|
||||
cluster-name = each.value
|
||||
eks-namespace = "default"
|
||||
pod-names = ["depl-nginx", "depl-alpine"]
|
||||
threshold-pod_cpu_utilization = 85
|
||||
threshold-pod_memory_utilization = 85
|
||||
threshold-pod_number_of_container_restarts = 5
|
||||
actions-enabled = var.actions-enabled
|
||||
sns-targets = local.sns-targets
|
||||
}
|
||||
```
|
81
modules/ManagementGovernance/Monitoring.EKS/main.tf
Normal file
81
modules/ManagementGovernance/Monitoring.EKS/main.tf
Normal file
@ -0,0 +1,81 @@
|
||||
// The following checks requires container insights
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "eks-pod_cpu_utilization" {
|
||||
for_each = toset(var.pod-names)
|
||||
alarm_name = "${var.cw-alarm-prefix}:EKS:pod_cpu_utilization:${each.value}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "pod_cpu_utilization"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-pod_cpu_utilization
|
||||
alarm_description = "EKS:pod_cpu_utilization"
|
||||
namespace = "ContainerInsights"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
"PodName" = each.value
|
||||
"ClusterName" = var.cluster-name
|
||||
"Namespace" = var.eks-namespace
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "eks-pod_memory_utilization" {
|
||||
for_each = toset(var.pod-names)
|
||||
|
||||
alarm_name = "${var.cw-alarm-prefix}:EKS:pod_memory_utilization:${each.value}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "pod_memory_utilization"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-pod_memory_utilization
|
||||
alarm_description = "EKS:pod_memory_utilization"
|
||||
namespace = "ContainerInsights"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
"PodName" = each.value
|
||||
"ClusterName" = var.cluster-name
|
||||
"Namespace" = var.eks-namespace
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "eks-pod_number_of_container_restarts" {
|
||||
for_each = toset(var.pod-names)
|
||||
|
||||
alarm_name = "${var.cw-alarm-prefix}:EKS:pod_number_of_container_restarts:${each.value}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "pod_number_of_container_restarts"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-pod_number_of_container_restarts
|
||||
alarm_description = "EKS:pod_number_of_container_restarts"
|
||||
namespace = "ContainerInsights"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
"PodName" = each.value
|
||||
"ClusterName" = var.cluster-name
|
||||
"Namespace" = var.eks-namespace
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
9
modules/ManagementGovernance/Monitoring.EKS/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.EKS/provider.tf
Normal file
@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
13
modules/ManagementGovernance/Monitoring.EKS/variables.tf
Normal file
13
modules/ManagementGovernance/Monitoring.EKS/variables.tf
Normal file
@ -0,0 +1,13 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable sns-targets {}
|
||||
variable default-tags {}
|
||||
|
||||
variable cluster-name {}
|
||||
variable eks-namespace {}
|
||||
variable pod-names {
|
||||
type = list
|
||||
}
|
||||
variable threshold-pod_cpu_utilization {}
|
||||
variable threshold-pod_memory_utilization {}
|
||||
variable threshold-pod_number_of_container_restarts {}
|
Loading…
Reference in New Issue
Block a user