resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" { alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_System:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "1" metric_name = "StatusCheckFailed_System" period = "300" statistic = "Maximum" threshold = 0 alarm_description = "EC2:StatusCheckFailed_System" namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-emergency] ok_actions = [var.sns-targets.alarm-actions-emergency] dimensions = { InstanceId = var.ec2-instance-id } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" { alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_Instance:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "1" metric_name = "StatusCheckFailed_Instance" period = "300" statistic = "Maximum" threshold = 0 alarm_description = "EC2:StatusCheckFailed_Instance" namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-emergency] ok_actions = [var.sns-targets.alarm-actions-emergency] dimensions = { InstanceId = var.ec2-instance-id } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" { alarm_name = "${var.cw-alarm-prefix}:EC2:CPUUtilization:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "6" metric_name = "CPUUtilization" period = "300" statistic = "Average" threshold = var.threshold-CPUUtilization alarm_description = "EC2:CPUUtilization" namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] treat_missing_data = "notBreaching" dimensions = { InstanceId = var.ec2-instance-id } tags = var.default-tags lifecycle { ignore_changes = [tags] } } # cwagent metrics data "aws_instance" "ec2-instance" { instance_id = var.ec2-instance-id } # get instance OS data "external" "ec2-os" { program = ["bash", "${path.module}/get-os-platform.sh"] query = { input = var.ec2-instance-id } } # Linux specific checks # default cw agent uses mem_used_percent metric /* resource "aws_cloudwatch_metric_alarm" "ec2-mem_free" { count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:mem_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "mem_free" period = "900" statistic = "Average" threshold = var.threshold-mem_free alarm_description = "EC2:mem_free" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-standard] ok_actions = [var.sns-targets.alarm-actions-standard] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } } */ resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" { count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:mem_used_percent:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" metric_name = "mem_used_percent" period = "900" statistic = "Average" threshold = var.threshold-mem_used_percent alarm_description = "EC2:mem_used_percent" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-standard] ok_actions = [var.sns-targets.alarm-actions-standard] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } } # default cw agent uses swap_used_percent metric /* resource "aws_cloudwatch_metric_alarm" "ec2-swap_free" { count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 # zero is fine as most ec2 instances are deployed without any swap alarm_name = "${var.cw-alarm-prefix}:EC2:swap_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" threshold = var.threshold-swap_free alarm_description = "EC2:swap_free" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-standard] ok_actions = [var.sns-targets.alarm-actions-standard] metric_query { id = "m1" metric { metric_name = "swap_free" namespace = "CWAgent" period = 900 stat = "Average" dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } } } metric_query { id = "e1" expression = "IF(m1==0, ${var.threshold-swap_free}, m1)" label = "swap_free_if_not_zero" return_data = "true" } tags = var.default-tags lifecycle { ignore_changes = [tags] } } */ data "external" "cw-dimensions" { program = ["bash", "${path.module}/get-cwagent-dimensions.sh"] query = { input = var.ec2-instance-id } } resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" { count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:swap_used_percent:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" metric_name = "swap_used_percent" period = "900" statistic = "Average" threshold = var.threshold-swap_used_percent alarm_description = "EC2:swap_used_percent" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } } # default cw agent uses disk_used_percent metric /* resource "aws_cloudwatch_metric_alarm" "ec2-disk_free" { count = data.external.ec2-os.result.os == "Linux" && length(data.external.cw-dimensions.result) > 0 ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:disk_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "disk_free" period = "900" statistic = "Average" threshold = var.threshold-disk_free alarm_description = "EC2:disk_free" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = data.external.cw-dimensions.result tags = var.default-tags lifecycle { ignore_changes = [tags] } } */ resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent" { count = data.external.ec2-os.result.os == "Linux" && length(data.external.cw-dimensions.result) > 0 ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:disk_used_percent:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" metric_name = "disk_used_percent" period = "900" statistic = "Average" threshold = var.threshold-disk_used_percentage alarm_description = "EC2:disk_used_percent" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = data.external.cw-dimensions.result tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" { count = data.external.ec2-os.result.os == "Linux" && length(data.external.cw-dimensions.result) > 0 ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:disk_inodes_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "disk_inodes_free" period = "300" statistic = "Average" threshold = var.threshold-disk_inodes_free alarm_description = "EC2:disk_inodes_free" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = data.external.cw-dimensions.result /* dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type device = data.external.disk-device.result.device fstype = data.external.disk-device.result.fstype path = "/" } */ tags = var.default-tags lifecycle { ignore_changes = [tags] } } # process metric not published by default cw agent config resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" { count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:processes_total:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" metric_name = "processes_total" period = "900" statistic = "Average" threshold = var.threshold-processes_total alarm_description = "EC2:processes_total" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } } # Windows specific checks resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" { count = data.external.ec2-os.result.os == "Windows" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:MemoryCommittedPct:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" metric_name = "Memory % Committed Bytes In Use" period = "900" statistic = "Average" threshold = var.threshold-MemoryCommittedPct alarm_description = "EC2:MemoryCommittedBytes" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = { objectname = "Memory" InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-OsDiskFreePct" { count = data.external.ec2-os.result.os == "Windows" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:OsDiskFreePct:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "LogicalDisk % Free Space" period = "300" statistic = "Average" threshold = var.threshold-LogicalDiskFreePct alarm_description = "EC2:OsDiskFreePct" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.sns-targets.alarm-actions-urgent] ok_actions = [var.sns-targets.alarm-actions-urgent] dimensions = { instance = "C:" objectname = "LogicalDisk" InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } }