380 lines
14 KiB
HCL
380 lines
14 KiB
HCL
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_System:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "1"
|
|
metric_name = "StatusCheckFailed_System"
|
|
period = "300"
|
|
statistic = "Maximum"
|
|
threshold = 0
|
|
alarm_description = "EC2:StatusCheckFailed_System"
|
|
namespace = "AWS/EC2"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-emergency]
|
|
ok_actions = [var.sns-targets.alarm-actions-emergency]
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_Instance:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "1"
|
|
metric_name = "StatusCheckFailed_Instance"
|
|
period = "300"
|
|
statistic = "Maximum"
|
|
threshold = 0
|
|
alarm_description = "EC2:StatusCheckFailed_Instance"
|
|
namespace = "AWS/EC2"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-emergency]
|
|
ok_actions = [var.sns-targets.alarm-actions-emergency]
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:CPUUtilization:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "6"
|
|
metric_name = "CPUUtilization"
|
|
period = "300"
|
|
statistic = "Average"
|
|
threshold = var.threshold-CPUUtilization
|
|
alarm_description = "EC2:CPUUtilization"
|
|
namespace = "AWS/EC2"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
treat_missing_data = "notBreaching"
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
# cwagent metrics
|
|
data "aws_instance" "ec2-instance" {
|
|
instance_id = var.ec2-instance-id
|
|
}
|
|
|
|
# get instance OS
|
|
data "external" "ec2-os" {
|
|
program = ["bash", "${path.module}/get-os-platform.sh"]
|
|
query = {
|
|
input = var.ec2-instance-id
|
|
}
|
|
}
|
|
|
|
# Linux specific checks
|
|
# default cw agent uses mem_used_percent metric
|
|
/*
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-mem_free" {
|
|
count = data.external.ec2-os.result.os == "Linux" ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:mem_free:${var.ec2-instance-id}"
|
|
comparison_operator = "LessThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "mem_free"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-mem_free
|
|
alarm_description = "EC2:mem_free"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-standard]
|
|
ok_actions = [var.sns-targets.alarm-actions-standard]
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
*/
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" {
|
|
count = data.external.ec2-os.result.os == "Linux" ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:mem_used_percent:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "mem_used_percent"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-mem_used_percent
|
|
alarm_description = "EC2:mem_used_percent"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-standard]
|
|
ok_actions = [var.sns-targets.alarm-actions-standard]
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
# default cw agent uses swap_used_percent metric
|
|
/*
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-swap_free" {
|
|
count = data.external.ec2-os.result.os == "Linux" ? 1 : 0
|
|
# zero is fine as most ec2 instances are deployed without any swap
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:swap_free:${var.ec2-instance-id}"
|
|
comparison_operator = "LessThanThreshold"
|
|
evaluation_periods = "2"
|
|
threshold = var.threshold-swap_free
|
|
alarm_description = "EC2:swap_free"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-standard]
|
|
ok_actions = [var.sns-targets.alarm-actions-standard]
|
|
metric_query {
|
|
id = "m1"
|
|
metric {
|
|
metric_name = "swap_free"
|
|
namespace = "CWAgent"
|
|
period = 900
|
|
stat = "Average"
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
}
|
|
}
|
|
metric_query {
|
|
id = "e1"
|
|
expression = "IF(m1==0, ${var.threshold-swap_free}, m1)"
|
|
label = "swap_free_if_not_zero"
|
|
return_data = "true"
|
|
}
|
|
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
*/
|
|
|
|
data "external" "cw-dimensions" {
|
|
program = ["bash", "${path.module}/get-cwagent-dimensions.sh"]
|
|
query = {
|
|
input = var.ec2-instance-id
|
|
}
|
|
}
|
|
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" {
|
|
count = data.external.ec2-os.result.os == "Linux" ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:swap_used_percent:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "swap_used_percent"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-swap_used_percent
|
|
alarm_description = "EC2:swap_used_percent"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
# default cw agent uses disk_used_percent metric
|
|
/*
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-disk_free" {
|
|
count = data.external.ec2-os.result.os == "Linux" && length(data.external.cw-dimensions.result) > 0 ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:disk_free:${var.ec2-instance-id}"
|
|
comparison_operator = "LessThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "disk_free"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-disk_free
|
|
alarm_description = "EC2:disk_free"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = data.external.cw-dimensions.result
|
|
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
*/
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent" {
|
|
count = data.external.ec2-os.result.os == "Linux" && length(data.external.cw-dimensions.result) > 0 ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:disk_used_percent:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "disk_used_percent"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-disk_used_percentage
|
|
alarm_description = "EC2:disk_used_percent"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = data.external.cw-dimensions.result
|
|
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" {
|
|
count = data.external.ec2-os.result.os == "Linux" && length(data.external.cw-dimensions.result) > 0 ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:disk_inodes_free:${var.ec2-instance-id}"
|
|
comparison_operator = "LessThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "disk_inodes_free"
|
|
period = "300"
|
|
statistic = "Average"
|
|
threshold = var.threshold-disk_inodes_free
|
|
alarm_description = "EC2:disk_inodes_free"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = data.external.cw-dimensions.result
|
|
/*
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
device = data.external.disk-device.result.device
|
|
fstype = data.external.disk-device.result.fstype
|
|
path = "/"
|
|
}
|
|
*/
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
# process metric not published by default cw agent config
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" {
|
|
count = data.external.ec2-os.result.os == "Linux" ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:processes_total:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "processes_total"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-processes_total
|
|
alarm_description = "EC2:processes_total"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = {
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
|
|
# Windows specific checks
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" {
|
|
count = data.external.ec2-os.result.os == "Windows" ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:MemoryCommittedPct:${var.ec2-instance-id}"
|
|
comparison_operator = "GreaterThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "Memory % Committed Bytes In Use"
|
|
period = "900"
|
|
statistic = "Average"
|
|
threshold = var.threshold-MemoryCommittedPct
|
|
alarm_description = "EC2:MemoryCommittedBytes"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = {
|
|
objectname = "Memory"
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
}
|
|
|
|
resource "aws_cloudwatch_metric_alarm" "ec2-OsDiskFreePct" {
|
|
count = data.external.ec2-os.result.os == "Windows" ? 1 : 0
|
|
alarm_name = "${var.cw-alarm-prefix}:EC2:OsDiskFreePct:${var.ec2-instance-id}"
|
|
comparison_operator = "LessThanThreshold"
|
|
evaluation_periods = "2"
|
|
metric_name = "LogicalDisk % Free Space"
|
|
period = "300"
|
|
statistic = "Average"
|
|
threshold = var.threshold-LogicalDiskFreePct
|
|
alarm_description = "EC2:OsDiskFreePct"
|
|
namespace = "CWAgent"
|
|
insufficient_data_actions = []
|
|
actions_enabled = var.actions-enabled
|
|
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
|
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
|
dimensions = {
|
|
instance = "C:"
|
|
objectname = "LogicalDisk"
|
|
InstanceId = var.ec2-instance-id
|
|
ImageId = data.aws_instance.ec2-instance.ami
|
|
InstanceType = data.aws_instance.ec2-instance.instance_type
|
|
}
|
|
tags = var.default-tags
|
|
lifecycle {
|
|
ignore_changes = [tags]
|
|
}
|
|
} |