resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" { alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_System:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "1" metric_name = "StatusCheckFailed_System" period = "300" statistic = "Maximum" threshold = 0 alarm_description = "EC2:StatusCheckFailed_System" namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-emergency] ok_actions = [var.alarm-actions-emergency] dimensions = { InstanceId = var.ec2-instance-id } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" { alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_Instance:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "1" metric_name = "StatusCheckFailed_Instance" period = "300" statistic = "Maximum" threshold = 0 alarm_description = "EC2:StatusCheckFailed_Instance" namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-emergency] ok_actions = [var.alarm-actions-emergency] dimensions = { InstanceId = var.ec2-instance-id } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" { alarm_name = "${var.cw-alarm-prefix}:EC2:CPUUtilization:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "6" metric_name = "CPUUtilization" period = "300" statistic = "Average" threshold = var.threshold-CPUUtilization alarm_description = "EC2:CPUUtilization" namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-urgent] ok_actions = [var.alarm-actions-urgent] treat_missing_data = "notBreaching" dimensions = { InstanceId = var.ec2-instance-id } tags = var.default-tags lifecycle { ignore_changes = [tags] } } # cwagent metrics data "aws_instance" "ec2-instance" { instance_id = var.ec2-instance-id } resource "aws_cloudwatch_metric_alarm" "ec2-mem_free" { alarm_name = "${var.cw-alarm-prefix}:EC2:mem_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "mem_free" period = "900" statistic = "Average" threshold = var.threshold-mem_free alarm_description = "EC2:mem_free" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-standard] ok_actions = [var.alarm-actions-standard] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-swap_free" { # zero is fine as most ec2 instances are deployed without any swap alarm_name = "${var.cw-alarm-prefix}:EC2:swap_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" threshold = var.threshold-swap_free alarm_description = "EC2:swap_free" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-standard] ok_actions = [var.alarm-actions-standard] metric_query { id = "m1" metric { metric_name = "swap_free" namespace = "CWAgent" period = 900 stat = "Average" dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } } } metric_query { id = "e1" expression = "IF(m1==0, ${var.threshold-swap_free}, m1)" label = "swap_free_if_not_zero" return_data = "true" } tags = var.default-tags lifecycle { ignore_changes = [tags] } } # get device dimension from cw metrics data "external" "disk-device" { program = ["bash", "${path.module}/get-cwagent-device.sh"] query = { input = var.ec2-instance-id } } resource "aws_cloudwatch_metric_alarm" "ec2-disk_free" { alarm_name = "${var.cw-alarm-prefix}:EC2:disk_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "disk_free" period = "900" statistic = "Average" threshold = var.threshold-disk_free alarm_description = "EC2:disk_free" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-urgent] ok_actions = [var.alarm-actions-urgent] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type device = data.external.disk-device.result.device fstype = data.external.disk-device.result.fstype path = "/" } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" { alarm_name = "${var.cw-alarm-prefix}:EC2:disk_inodes_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" metric_name = "disk_inodes_free" period = "300" statistic = "Average" threshold = var.threshold-disk_inodes_free alarm_description = "EC2:disk_inodes_free" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-urgent] ok_actions = [var.alarm-actions-urgent] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type device = data.external.disk-device.result.device fstype = data.external.disk-device.result.fstype path = "/" } tags = var.default-tags lifecycle { ignore_changes = [tags] } } resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" { alarm_name = "${var.cw-alarm-prefix}:EC2:processes_total:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" metric_name = "processes_total" period = "900" statistic = "Average" threshold = var.threshold-processes_total alarm_description = "EC2:processes_total" namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.alarm-actions-urgent] ok_actions = [var.alarm-actions-urgent] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } tags = var.default-tags lifecycle { ignore_changes = [tags] } }