UPD: various monitoring updates from upstream

This commit is contained in:
xpk 2023-07-04 08:16:09 +08:00
parent fb55cace53
commit e1373f2f62
Signed by: xpk
GPG Key ID: CD4FF6793F09AB86
28 changed files with 137 additions and 106 deletions

View File

@ -19,7 +19,6 @@ resource "aws_cloudwatch_metric_alarm" "alb-HTTPCode_ELB_5XX_Count" {
dimensions = { dimensions = {
LoadBalancer = local.alb-name LoadBalancer = local.alb-name
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "alb-TargetConnectionErrorCount" { resource "aws_cloudwatch_metric_alarm" "alb-TargetConnectionErrorCount" {
@ -39,7 +38,6 @@ resource "aws_cloudwatch_metric_alarm" "alb-TargetConnectionErrorCount" {
dimensions = { dimensions = {
LoadBalancer = local.alb-name LoadBalancer = local.alb-name
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "alb-TargetResponseTime" { resource "aws_cloudwatch_metric_alarm" "alb-TargetResponseTime" {
@ -59,7 +57,6 @@ resource "aws_cloudwatch_metric_alarm" "alb-TargetResponseTime" {
dimensions = { dimensions = {
LoadBalancer = local.alb-name LoadBalancer = local.alb-name
} }
tags = var.default-tags
} }
/* /*
@ -72,6 +69,7 @@ module "alb-targetgroups" {
*/ */
// causes Rate exceeded error, maybe because of adaptive AWS_RETRY_MODE? // causes Rate exceeded error, maybe because of adaptive AWS_RETRY_MODE?
/*
module "alb_tgs" { module "alb_tgs" {
assume_role_arn = var.asrolearn assume_role_arn = var.asrolearn
role_session_name = "terraform-resource-list" role_session_name = "terraform-resource-list"
@ -79,10 +77,19 @@ module "alb_tgs" {
aws_cli_commands = ["elbv2", "describe-target-groups", "--load-balancer-arn", var.load-balancer] aws_cli_commands = ["elbv2", "describe-target-groups", "--load-balancer-arn", var.load-balancer]
aws_cli_query = "TargetGroups[*].TargetGroupArn" aws_cli_query = "TargetGroups[*].TargetGroupArn"
} }
*/
module alb_tgs {
source = "../../util/awscli"
access_key = var.target-account-ak
aws_cli_commands = "elbv2 describe-target-groups --load-balancer-arn ${var.load-balancer} --query TargetGroups[*].TargetGroupArn"
secret_key = var.target-account-sk
session_token = var.target-account-token
}
resource "aws_cloudwatch_metric_alarm" "alb-HealthyHostCount" { resource "aws_cloudwatch_metric_alarm" "alb-HealthyHostCount" {
# for_each = module.alb-targetgroups.result-set # for_each = module.alb-targetgroups.result-set
for_each = toset(flatten(module.alb_tgs.result)) for_each = toset(module.alb_tgs.awscliout)
alarm_name = "${var.settings.HealthHostCountMin.ecccode}-ALBTG_:${split(":", each.value)[5]}-HealthyHostCount" alarm_name = "${var.settings.HealthHostCountMin.ecccode}-ALBTG_:${split(":", each.value)[5]}-HealthyHostCount"
comparison_operator = var.settings.HealthHostCountMin.comparison_operator comparison_operator = var.settings.HealthHostCountMin.comparison_operator
evaluation_periods = var.settings.HealthHostCountMin.evaluation_periods evaluation_periods = var.settings.HealthHostCountMin.evaluation_periods
@ -100,5 +107,4 @@ resource "aws_cloudwatch_metric_alarm" "alb-HealthyHostCount" {
TargetGroup = split(":", each.value)[5] TargetGroup = split(":", each.value)[5]
LoadBalancer = "app/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}" LoadBalancer = "app/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
} }
tags = var.default-tags
} }

View File

@ -1,4 +1,4 @@
output alb-tg-count { output alb-tg-count {
# value = length(module.alb-targetgroups.result-set) # value = length(module.alb-targetgroups.result-set)
value = length(flatten(module.alb_tgs.result)) value = length(flatten(module.alb_tgs.awscliout))
} }

View File

@ -2,5 +2,7 @@ variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable load-balancer {} variable load-balancer {}
variable settings {} variable settings {}
variable default-tags {} # variable asrolearn {}
variable asrolearn {} variable target-account-ak {}
variable target-account-sk {}
variable target-account-token {}

View File

@ -19,7 +19,6 @@ resource "aws_cloudwatch_metric_alarm" "asg-CPUUtilization" {
dimensions = { dimensions = {
AutoScalingGroupName =var.asg-name AutoScalingGroupName =var.asg-name
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "asg-GroupInServiceCapacity" { resource "aws_cloudwatch_metric_alarm" "asg-GroupInServiceCapacity" {
@ -39,5 +38,4 @@ resource "aws_cloudwatch_metric_alarm" "asg-GroupInServiceCapacity" {
dimensions = { dimensions = {
AutoScalingGroupName = var.asg-name AutoScalingGroupName = var.asg-name
} }
tags = var.default-tags
} }

View File

@ -2,5 +2,4 @@ variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable asg-name {} variable asg-name {}
variable settings {} variable settings {}
variable default-tags {}
variable ecccode {} variable ecccode {}

View File

@ -1,7 +1,20 @@
#!/bin/bash #!/bin/bash
eval "$(jq -r '@sh "export id=\(.input) asrolearn=\(.asrolearn)"')" # Get the query
eval $(aws sts assume-role --role-arn $asrolearn --role-session-name awscli | jq -cr '"export AWS_ACCESS_KEY_ID=" + .Credentials.AccessKeyId, "export AWS_SECRET_ACCESS_KEY=" + .Credentials.SecretAccessKey, "export AWS_SESSION_TOKEN=" + .Credentials.SessionToken, "export AWS_SESSION_EXPIRATION=" + .Credentials.Expiration') TERRAFORM_QUERY=$(jq -Mc .)
# Extract the query attributes
access_key=$(echo "${TERRAFORM_QUERY}" | jq -r '.access_key')
secret_key=$(echo "${TERRAFORM_QUERY}" | jq -r '.secret_key')
session_token=$(echo "${TERRAFORM_QUERY}" | jq -r '.session_token')
iid=$(echo "${TERRAFORM_QUERY}" | jq -r '.iid')
# eval "$(jq -r '@sh "export id=\(.input) asrolearn=\(.asrolearn)"')"
# eval $(aws sts assume-role --role-arn $asrolearn --role-session-name awscli | jq -cr '"export AWS_ACCESS_KEY_ID=" + .Credentials.AccessKeyId, "export AWS_SECRET_ACCESS_KEY=" + .Credentials.SecretAccessKey, "export AWS_SESSION_TOKEN=" + .Credentials.SessionToken, "export AWS_SESSION_EXPIRATION=" + .Credentials.Expiration')
export AWS_ACCESS_KEY_ID=$access_key
export AWS_SECRET_ACCESS_KEY=$secret_key
export AWS_SESSION_TOKEN=$session_token
aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \ aws cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free \
--dimensions Name=InstanceId,Value=$id Name=path,Value=/ | \ --dimensions Name=InstanceId,Value=$iid Name=path,Value=/ | \
jq '.Metrics[] | .Dimensions[] | {(.Name):(.Value)}' | jq -s 'add' jq '.Metrics[] | .Dimensions[] | {(.Name):(.Value)}' | jq -s 'add'

View File

@ -15,7 +15,6 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
dimensions = { dimensions = {
InstanceId = var.ec2-instance-id InstanceId = var.ec2-instance-id
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" { resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
@ -35,7 +34,6 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
dimensions = { dimensions = {
InstanceId = var.ec2-instance-id InstanceId = var.ec2-instance-id
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" { resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
@ -56,7 +54,6 @@ resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
dimensions = { dimensions = {
InstanceId = var.ec2-instance-id InstanceId = var.ec2-instance-id
} }
tags = var.default-tags
} }
# cwagent metrics # cwagent metrics
@ -64,31 +61,28 @@ data "aws_instance" "ec2-instance" {
instance_id = var.ec2-instance-id instance_id = var.ec2-instance-id
} }
# get instance OS
/*
data "external" "ec2-os" {
program = ["bash", "${path.module}/get-os-platform.sh"]
query = {
input = var.ec2-instance-id
asrolearn = var.asrolearn
}
}
*/
module "ec2_os" { module "ec2_os" {
source = "../../util/terraform-aws-cli" source = "../../util/awscli"
assume_role_arn = var.asrolearn access_key = var.target-account-ak
role_session_name = "terraform-ec2-detect-os" aws_cli_commands = "ec2 describe-instances --instance-ids ${var.ec2-instance-id} --query Reservations[].Instances[].PlatformDetails"
aws_cli_commands = ["ec2", "describe-instances", "--instance-ids", var.ec2-instance-id] secret_key = var.target-account-sk
aws_cli_query = "Reservations[].Instances[].PlatformDetails" session_token = var.target-account-token
} }
# Linux specific checks # Linux specific checks
# default cw agent uses mem_used_percent metric # default cw agent uses mem_used_percent metric
# detect presense of cloudwatch agent
module "detect_cloudwatch_agent" {
source = "../../util/awscli"
access_key = var.target-account-ak
secret_key = var.target-account-sk
session_token = var.target-account-token
aws_cli_commands = "cloudwatch list-metrics --namespace CWAgent --dimensions Name=InstanceId,Value=${var.ec2-instance-id} --query Metrics[].MetricName --max-items 1"
}
resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" { resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" {
# count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" ? 0 : 1
alarm_name = "${var.settings.mem_used_percent.ecccode}-EC2_${var.ec2-instance-id}-mem_used_percent" alarm_name = "${var.settings.mem_used_percent.ecccode}-EC2_${var.ec2-instance-id}-mem_used_percent"
comparison_operator = var.settings.mem_used_percent.comparison_operator comparison_operator = var.settings.mem_used_percent.comparison_operator
evaluation_periods = var.settings.mem_used_percent.evaluation_periods evaluation_periods = var.settings.mem_used_percent.evaluation_periods
@ -107,20 +101,30 @@ resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" {
ImageId = data.aws_instance.ec2-instance.ami ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type InstanceType = data.aws_instance.ec2-instance.instance_type
} }
tags = var.default-tags
} }
data "external" "cw-dimensions" { data "external" "cw-dimensions" {
program = ["bash", "${path.module}/get-cwagent-dimensions.sh"] program = ["bash", "${path.module}/get-cwagent-dimensions.sh"]
query = { query = {
input = var.ec2-instance-id iid = var.ec2-instance-id
asrolearn = var.asrolearn access_key = var.target-account-ak
secret_key = var.target-account-sk
session_token = var.target-account-token
} }
} }
/* module returns blank
module "cw-dimensions" {
source = "../../util/awscli"
access_key = var.target-account-ak
aws_cli_commands = "cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free --dimensions Name=InstanceId,Value=${var.ec2-instance-id} Name=path,Value=/ --query Metrics[].Dimensions[] | jq '.[] | {(.Name):(.Value)}' | jq -s 'add'"
secret_key = var.target-account-sk
session_token = var.target-account-token
}
*/
resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" { resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" {
# count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" ? 0 : 1
alarm_name = "${var.settings.swap_used_percent.ecccode}-EC2_${var.ec2-instance-id}-swap_used_percent" alarm_name = "${var.settings.swap_used_percent.ecccode}-EC2_${var.ec2-instance-id}-swap_used_percent"
comparison_operator = var.settings.swap_used_percent.comparison_operator comparison_operator = var.settings.swap_used_percent.comparison_operator
evaluation_periods = var.settings.swap_used_percent.evaluation_periods evaluation_periods = var.settings.swap_used_percent.evaluation_periods
@ -139,12 +143,10 @@ resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" {
ImageId = data.aws_instance.ec2-instance.ami ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type InstanceType = data.aws_instance.ec2-instance.instance_type
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent" { resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent" {
# count = data.external.ec2-os.result.os == "Linux" && data.external.cw-dimensions.result != null ? 1 : 0 count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" && data.external.cw-dimensions.result != null ? 0 : 1
alarm_name = "${var.settings.disk_used_percent.ecccode}-EC2_${var.ec2-instance-id}-disk_used_percent" alarm_name = "${var.settings.disk_used_percent.ecccode}-EC2_${var.ec2-instance-id}-disk_used_percent"
comparison_operator = var.settings.disk_used_percent.comparison_operator comparison_operator = var.settings.disk_used_percent.comparison_operator
evaluation_periods = var.settings.disk_used_percent.evaluation_periods evaluation_periods = var.settings.disk_used_percent.evaluation_periods
@ -159,14 +161,11 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent" {
alarm_actions = [var.settings.disk_used_percent.action] alarm_actions = [var.settings.disk_used_percent.action]
ok_actions = [var.settings.disk_used_percent.action] ok_actions = [var.settings.disk_used_percent.action]
dimensions = data.external.cw-dimensions.result dimensions = data.external.cw-dimensions.result
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" { resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" {
# count = data.external.ec2-os.result.os == "Linux" && data.external.cw-dimensions.result != null ? 1 : 0 count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" && data.external.cw-dimensions.result != null ? 0 : 1
alarm_name = "${var.settings.disk_inodes_free.ecccode}-EC2_${var.ec2-instance-id}-disk_inodes_free" alarm_name = "${var.settings.disk_inodes_free.ecccode}-EC2_${var.ec2-instance-id}-disk_inodes_free"
comparison_operator = var.settings.disk_inodes_free.comparison_operator comparison_operator = var.settings.disk_inodes_free.comparison_operator
evaluation_periods = var.settings.disk_inodes_free.evaluation_periods evaluation_periods = var.settings.disk_inodes_free.evaluation_periods
@ -181,23 +180,11 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" {
alarm_actions = [var.settings.disk_inodes_free.action] alarm_actions = [var.settings.disk_inodes_free.action]
ok_actions = [var.settings.disk_inodes_free.action] ok_actions = [var.settings.disk_inodes_free.action]
dimensions = data.external.cw-dimensions.result dimensions = data.external.cw-dimensions.result
/*
dimensions = {
InstanceId = var.ec2-instance-id
ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type
device = data.external.disk-device.result.device
fstype = data.external.disk-device.result.fstype
path = "/"
}
*/
tags = var.default-tags
} }
# process metric not published by default cw agent config # process metric not published by default cw agent config
resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" { resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" {
# count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" ? 0 : 1
alarm_name = "${var.settings.processes_total.ecccode}-EC2_${var.ec2-instance-id}-processes_total" alarm_name = "${var.settings.processes_total.ecccode}-EC2_${var.ec2-instance-id}-processes_total"
comparison_operator = var.settings.processes_total.comparison_operator comparison_operator = var.settings.processes_total.comparison_operator
evaluation_periods = var.settings.processes_total.evaluation_periods evaluation_periods = var.settings.processes_total.evaluation_periods
@ -216,14 +203,57 @@ resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" {
ImageId = data.aws_instance.ec2-instance.ami ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type InstanceType = data.aws_instance.ec2-instance.instance_type
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "ec2-net_err_in" {
count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
alarm_name = "${var.settings.net_err_in.ecccode}-EC2_${var.ec2-instance-id}-net_err_in"
comparison_operator = var.settings.net_err_in.comparison_operator
evaluation_periods = var.settings.net_err_in.evaluation_periods
metric_name = "net_err_in"
period = var.settings.net_err_in.period
statistic = var.settings.net_err_in.statistic
threshold = var.settings.net_err_in.threshold
alarm_description = "EC2:net_err_in"
namespace = "CWAgent"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.settings.net_err_in.action]
ok_actions = [var.settings.net_err_in.action]
dimensions = {
InstanceId = var.ec2-instance-id
ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type
interface = "eth0"
}
}
resource "aws_cloudwatch_metric_alarm" "ec2-net_err_out" {
count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
alarm_name = "${var.settings.net_err_out.ecccode}-EC2_${var.ec2-instance-id}-net_err_out"
comparison_operator = var.settings.net_err_out.comparison_operator
evaluation_periods = var.settings.net_err_out.evaluation_periods
metric_name = "net_err_in"
period = var.settings.net_err_out.period
statistic = var.settings.net_err_out.statistic
threshold = var.settings.net_err_out.threshold
alarm_description = "EC2:net_err_out"
namespace = "CWAgent"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.settings.net_err_out.action]
ok_actions = [var.settings.net_err_out.action]
dimensions = {
InstanceId = var.ec2-instance-id
ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type
interface = "eth0"
}
}
# Windows specific checks # Windows specific checks
resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" { resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" {
# count = data.external.ec2-os.result.os == "Windows" ? 1 : 0 count = module.ec2_os.awscliout[0] == "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" ? 1 : 0
alarm_name = "${var.settings.MemoryCommittedPct.ecccode}-EC2_${var.ec2-instance-id}-MemoryCommittedPct" alarm_name = "${var.settings.MemoryCommittedPct.ecccode}-EC2_${var.ec2-instance-id}-MemoryCommittedPct"
comparison_operator = var.settings.MemoryCommittedPct.comparison_operator comparison_operator = var.settings.MemoryCommittedPct.comparison_operator
evaluation_periods = var.settings.MemoryCommittedPct.evaluation_periods evaluation_periods = var.settings.MemoryCommittedPct.evaluation_periods
@ -243,12 +273,10 @@ resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" {
ImageId = data.aws_instance.ec2-instance.ami ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type InstanceType = data.aws_instance.ec2-instance.instance_type
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "ec2-LogicalDiskFreePct" { resource "aws_cloudwatch_metric_alarm" "ec2-LogicalDiskFreePct" {
# count = data.external.ec2-os.result.os == "Windows" ? 1 : 0 count = module.ec2_os.awscliout[0] == "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0
count = flatten(module.ec2_os.result)[0] == "Windows" ? 1 : 0
alarm_name = "${var.settings.LogicalDiskFreePct.ecccode}-EC2_${var.ec2-instance-id}-LogicalDiskFreePct" alarm_name = "${var.settings.LogicalDiskFreePct.ecccode}-EC2_${var.ec2-instance-id}-LogicalDiskFreePct"
comparison_operator = var.settings.LogicalDiskFreePct.comparison_operator comparison_operator = var.settings.LogicalDiskFreePct.comparison_operator
evaluation_periods = var.settings.LogicalDiskFreePct.evaluation_periods evaluation_periods = var.settings.LogicalDiskFreePct.evaluation_periods
@ -269,5 +297,4 @@ resource "aws_cloudwatch_metric_alarm" "ec2-LogicalDiskFreePct" {
ImageId = data.aws_instance.ec2-instance.ami ImageId = data.aws_instance.ec2-instance.ami
InstanceType = data.aws_instance.ec2-instance.instance_type InstanceType = data.aws_instance.ec2-instance.instance_type
} }
tags = var.default-tags
} }

View File

@ -2,5 +2,7 @@ variable "cw-alarm-prefix" {}
variable "actions-enabled" {} variable "actions-enabled" {}
variable "ec2-instance-id" {} variable "ec2-instance-id" {}
variable "settings" {} variable "settings" {}
variable "asrolearn" {} # variable asrolearn {}
variable "default-tags" {} variable target-account-ak {}
variable target-account-sk {}
variable target-account-token {}

View File

@ -20,7 +20,6 @@ resource "aws_cloudwatch_metric_alarm" "eks-pod_cpu_utilization" {
"ClusterName" = var.cluster-name "ClusterName" = var.cluster-name
"Namespace" = var.eks-namespace "Namespace" = var.eks-namespace
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "eks-pod_memory_utilization" { resource "aws_cloudwatch_metric_alarm" "eks-pod_memory_utilization" {
@ -44,7 +43,6 @@ resource "aws_cloudwatch_metric_alarm" "eks-pod_memory_utilization" {
"ClusterName" = var.cluster-name "ClusterName" = var.cluster-name
"Namespace" = var.eks-namespace "Namespace" = var.eks-namespace
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "eks-pod_number_of_container_restarts" { resource "aws_cloudwatch_metric_alarm" "eks-pod_number_of_container_restarts" {
@ -68,5 +66,4 @@ resource "aws_cloudwatch_metric_alarm" "eks-pod_number_of_container_restarts" {
"ClusterName" = var.cluster-name "ClusterName" = var.cluster-name
"Namespace" = var.eks-namespace "Namespace" = var.eks-namespace
} }
tags = var.default-tags
} }

View File

@ -1,7 +1,5 @@
variable cw-alarm-prefix {} variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable default-tags {}
variable cluster-name {} variable cluster-name {}
variable eks-namespace {} variable eks-namespace {}
variable pod-names { variable pod-names {

View File

@ -16,5 +16,4 @@ resource "aws_cloudwatch_metric_alarm" "emr-alarms" {
dimensions = { dimensions = {
JobFlowId = var.job-flow-id JobFlowId = var.job-flow-id
} }
tags = var.default-tags
} }

View File

@ -1,5 +1,4 @@
variable cw-alarm-prefix {} variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable job-flow-id {} variable job-flow-id {}
variable settings {} variable settings {}
variable default-tags {}

View File

@ -1,5 +1,3 @@
variable cw-alarm-prefix {} variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable settings {} variable settings {}
variable default-tags {}

View File

@ -15,7 +15,6 @@ resource "aws_cloudwatch_metric_alarm" "Kafka-ZooKeeperRequestLatencyMsMean" {
dimensions = { dimensions = {
"Cluster Name" = var.cluster-name "Cluster Name" = var.cluster-name
} }
tags = var.default-tags
} }
data "aws_msk_cluster" "msk-cluster" { data "aws_msk_cluster" "msk-cluster" {
@ -71,8 +70,6 @@ resource "aws_cloudwatch_metric_alarm" "Kafka-CpuUserSystem" {
label = "CpuUserSystem" label = "CpuUserSystem"
return_data = "true" return_data = "true"
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "Kafka-KafkaDataLogsDiskUsed" { resource "aws_cloudwatch_metric_alarm" "Kafka-KafkaDataLogsDiskUsed" {
@ -94,7 +91,6 @@ resource "aws_cloudwatch_metric_alarm" "Kafka-KafkaDataLogsDiskUsed" {
"Cluster Name" = var.cluster-name "Cluster Name" = var.cluster-name
"Broker ID" = each.value "Broker ID" = each.value
} }
tags = var.default-tags
} }
resource "aws_cloudwatch_metric_alarm" "Kafka-HeapMemoryAfterGC" { resource "aws_cloudwatch_metric_alarm" "Kafka-HeapMemoryAfterGC" {
@ -116,6 +112,5 @@ resource "aws_cloudwatch_metric_alarm" "Kafka-HeapMemoryAfterGC" {
"Cluster Name" = var.cluster-name "Cluster Name" = var.cluster-name
"Broker ID" = each.value "Broker ID" = each.value
} }
tags = var.default-tags
} }

View File

@ -2,5 +2,3 @@ variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable cluster-name {} variable cluster-name {}
variable settings {} variable settings {}
variable default-tags {}

View File

@ -16,5 +16,4 @@ resource "aws_cloudwatch_metric_alarm" "ngw-alarms" {
dimensions = { dimensions = {
NatGatewayId = var.res-id NatGatewayId = var.res-id
} }
tags = var.default-tags
} }

View File

@ -1,5 +1,4 @@
variable cw-alarm-prefix {} variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable res-id {} variable res-id {}
variable default-tags {}
variable settings {} variable settings {}

View File

@ -27,7 +27,6 @@ resource "aws_cloudwatch_metric_alarm" "nlb-TCP_Target_Reset_Count" {
dimensions = { dimensions = {
LoadBalancer = local.nlb-name LoadBalancer = local.nlb-name
} }
tags = var.default-tags
} }
/* /*
@ -41,6 +40,7 @@ module "nlb-targetgroups" {
// causes Rate exceeded error, maybe because of adaptive AWS_RETRY_MODE? // causes Rate exceeded error, maybe because of adaptive AWS_RETRY_MODE?
/*
module "nlb_tgs" { module "nlb_tgs" {
assume_role_arn = var.asrolearn assume_role_arn = var.asrolearn
role_session_name = "terraform-resource-list" role_session_name = "terraform-resource-list"
@ -48,10 +48,20 @@ module "nlb_tgs" {
aws_cli_commands = ["elbv2", "describe-target-groups", "--load-balancer-arn", var.load-balancer] aws_cli_commands = ["elbv2", "describe-target-groups", "--load-balancer-arn", var.load-balancer]
aws_cli_query = "TargetGroups[*].TargetGroupArn" aws_cli_query = "TargetGroups[*].TargetGroupArn"
} }
*/
module nlb_tgs {
source = "../../util/awscli"
access_key = var.target-account-ak
aws_cli_commands = "elbv2 describe-target-groups --load-balancer-arn ${var.load-balancer} --query TargetGroups[*].TargetGroupArn"
secret_key = var.target-account-sk
session_token = var.target-account-token
}
resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" { resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" {
# for_each = module.nlb-targetgroups.result-set # for_each = module.nlb-targetgroups.result-set
for_each = toset(flatten(module.nlb_tgs.result)) for_each = toset(module.nlb_tgs.awscliout)
alarm_name = "${var.settings.HealthHostCountMin.ecccode}-NLBTG_${split(":", each.value)[5]}-HealthyHostCount" alarm_name = "${var.settings.HealthHostCountMin.ecccode}-NLBTG_${split(":", each.value)[5]}-HealthyHostCount"
comparison_operator = var.settings.HealthHostCountMin.comparison_operator comparison_operator = var.settings.HealthHostCountMin.comparison_operator
evaluation_periods = var.settings.HealthHostCountMin.evaluation_periods evaluation_periods = var.settings.HealthHostCountMin.evaluation_periods
@ -69,5 +79,4 @@ resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" {
TargetGroup = split(":", each.value)[5] TargetGroup = split(":", each.value)[5]
LoadBalancer = "net/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}" LoadBalancer = "net/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
} }
tags = var.default-tags
} }

View File

@ -1,4 +1,4 @@
output nlb-tg-count { output nlb-tg-count {
# value = length(module.nlb-targetgroups.result-set) # value = length(module.nlb-targetgroups.result-set)
value = length(flatten(module.nlb_tgs.result)) value = length(flatten(module.nlb_tgs.awscliout))
} }

View File

@ -2,5 +2,7 @@ variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable load-balancer {} variable load-balancer {}
variable settings {} variable settings {}
variable default-tags {} # variable asrolearn {}
variable asrolearn {} variable target-account-ak {}
variable target-account-sk {}
variable target-account-token {}

View File

@ -19,5 +19,4 @@ resource "aws_cloudwatch_metric_alarm" "ES-alarms" {
DomainName = var.domain-name DomainName = var.domain-name
ClientId = data.aws_caller_identity.this.id ClientId = data.aws_caller_identity.this.id
} }
tags = var.default-tags
} }

View File

@ -2,5 +2,3 @@ variable "cw-alarm-prefix" {}
variable "actions-enabled" {} variable "actions-enabled" {}
variable "domain-name" {} variable "domain-name" {}
variable "settings" {} variable "settings" {}
variable "default-tags" {}

View File

@ -16,5 +16,4 @@ resource "aws_cloudwatch_metric_alarm" "rds-alarms" {
dimensions = { dimensions = {
DBInstanceIdentifier = var.rds-instance-name DBInstanceIdentifier = var.rds-instance-name
} }
tags = var.default-tags
} }

View File

@ -1,5 +1,4 @@
variable cw-alarm-prefix {} variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable rds-instance-name {} variable rds-instance-name {}
variable settings {} variable settings {}
variable default-tags {}

View File

@ -16,5 +16,4 @@ resource "aws_cloudwatch_metric_alarm" "redis-alarms" {
dimensions = { dimensions = {
CacheClusterId = var.redis-cluster-id CacheClusterId = var.redis-cluster-id
} }
tags = var.default-tags
} }

View File

@ -1,5 +1,4 @@
variable "cw-alarm-prefix" {} variable "cw-alarm-prefix" {}
variable "actions-enabled" {} variable "actions-enabled" {}
variable "redis-cluster-id" {} variable "redis-cluster-id" {}
variable "settings" {} variable "settings" {}
variable "default-tags" {}

View File

@ -16,5 +16,4 @@ resource "aws_cloudwatch_metric_alarm" "tgw-PacketDropCountNoRoute" {
dimensions = { dimensions = {
TransitGateway = var.tgw-id TransitGateway = var.tgw-id
} }
tags = var.default-tags
} }

View File

@ -1,5 +1,4 @@
variable cw-alarm-prefix {} variable cw-alarm-prefix {}
variable actions-enabled {} variable actions-enabled {}
variable tgw-id {} variable tgw-id {}
variable settings {} variable settings {}
variable default-tags {}