locals { # alarm-message limited to 1024 characters alarm-message = < 0 ? 1 : 0 alarm_name = "${var.settings.mem_used_percent.ecccode}-EC2_${var.ec2-instance-id}-mem_used_percent" comparison_operator = var.settings.mem_used_percent.comparison_operator evaluation_periods = var.settings.mem_used_percent.evaluation_periods metric_name = "mem_used_percent" period = var.settings.mem_used_percent.period statistic = var.settings.mem_used_percent.statistic threshold = var.settings.mem_used_percent.threshold # alarm_description = "EC2:mem_used_percent" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.mem_used_percent.action] ok_actions = [var.settings.mem_used_percent.action] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } } data "external" "cw-dimensions" { program = ["bash", "${path.module}/get-cwagent-dimensions.sh"] query = { iid = var.ec2-instance-id access_key = var.target-account-ak secret_key = var.target-account-sk session_token = var.target-account-token } } /* module returns blank module "cw-dimensions" { source = "../../util/awscli" access_key = var.target-account-ak aws_cli_commands = "cloudwatch list-metrics --namespace CWAgent --metric-name disk_inodes_free --dimensions Name=InstanceId,Value=${var.ec2-instance-id} Name=path,Value=/ --query Metrics[].Dimensions[] | jq '.[] | {(.Name):(.Value)}' | jq -s 'add'" secret_key = var.target-account-sk session_token = var.target-account-token } */ resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" { count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0 alarm_name = "${var.settings.swap_used_percent.ecccode}-EC2_${var.ec2-instance-id}-swap_used_percent" comparison_operator = var.settings.swap_used_percent.comparison_operator evaluation_periods = var.settings.swap_used_percent.evaluation_periods metric_name = "swap_used_percent" period = var.settings.swap_used_percent.period statistic = var.settings.swap_used_percent.statistic threshold = var.settings.swap_used_percent.threshold # alarm_description = "EC2:swap_used_percent" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.swap_used_percent.action] ok_actions = [var.settings.swap_used_percent.action] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } } resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent_warn" { count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0 alarm_name = "${var.settings.disk_used_percent_warn.ecccode}-EC2_${var.ec2-instance-id}-disk_used_percent" comparison_operator = var.settings.disk_used_percent_warn.comparison_operator evaluation_periods = var.settings.disk_used_percent_warn.evaluation_periods metric_name = "disk_used_percent" period = var.settings.disk_used_percent_warn.period statistic = var.settings.disk_used_percent_warn.statistic threshold = var.settings.disk_used_percent_warn.threshold # alarm_description = "EC2:disk_used_percent" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.disk_used_percent_warn.action] ok_actions = [var.settings.disk_used_percent_warn.action] dimensions = data.external.cw-dimensions.result } resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent_crit" { count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0 alarm_name = "${var.settings.disk_used_percent_crit.ecccode}-EC2_${var.ec2-instance-id}-disk_used_percent" comparison_operator = var.settings.disk_used_percent_crit.comparison_operator evaluation_periods = var.settings.disk_used_percent_crit.evaluation_periods metric_name = "disk_used_percent" period = var.settings.disk_used_percent_crit.period statistic = var.settings.disk_used_percent_crit.statistic threshold = var.settings.disk_used_percent_crit.threshold # alarm_description = "EC2:disk_used_percent" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.disk_used_percent_crit.action] ok_actions = [var.settings.disk_used_percent_crit.action] dimensions = data.external.cw-dimensions.result } resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" { count = module.ec2_os.awscliout[0] != "Windows" && data.external.cw-dimensions.result != null ? 1 : 0 alarm_name = "${var.settings.disk_inodes_free.ecccode}-EC2_${var.ec2-instance-id}-disk_inodes_free" comparison_operator = var.settings.disk_inodes_free.comparison_operator evaluation_periods = var.settings.disk_inodes_free.evaluation_periods metric_name = "disk_inodes_free" period = var.settings.disk_inodes_free.period statistic = var.settings.disk_inodes_free.statistic threshold = var.settings.disk_inodes_free.threshold # alarm_description = "EC2:disk_inodes_free" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.disk_inodes_free.action] ok_actions = [var.settings.disk_inodes_free.action] dimensions = data.external.cw-dimensions.result } # process metric not published by default cw agent config resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" { count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0 alarm_name = "${var.settings.processes_total.ecccode}-EC2_${var.ec2-instance-id}-processes_total" comparison_operator = var.settings.processes_total.comparison_operator evaluation_periods = var.settings.processes_total.evaluation_periods metric_name = "processes_total" period = var.settings.processes_total.period statistic = var.settings.processes_total.statistic threshold = var.settings.processes_total.threshold # alarm_description = "EC2:processes_total" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.processes_total.action] ok_actions = [var.settings.processes_total.action] dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } } resource "aws_cloudwatch_metric_alarm" "ec2-net_err" { count = module.ec2_os.awscliout[0] != "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0 alarm_name = "${var.settings.net_err_in.ecccode}-EC2_${var.ec2-instance-id}-net_err" comparison_operator = "GreaterThanThreshold" evaluation_periods = var.settings.net_err_in.evaluation_periods threshold = 0 # alarm_description = "EC2:net_err_in or EC2:net_err_out exceeds threshold" alarm_description = local.alarm-message insufficient_data_actions = [] actions_enabled = false alarm_actions = [var.settings.net_err_in.action] ok_actions = [var.settings.net_err_in.action] treat_missing_data = "notBreaching" metric_query { id = "e1" expression = "IF(m1 > ${var.settings.net_err_in.threshold} OR m2 > ${var.settings.net_err_out.threshold}, 1, 0)" label = "net_err_exceeds_threshold" return_data = "true" } metric_query { id = "m1" metric { metric_name = "net_err_in" namespace = "CWAgent" period = var.settings.net_err_in.period stat = var.settings.net_err_in.statistic dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type interface = "eth0" } } } metric_query { id = "m2" metric { metric_name = "net_err_out" namespace = "CWAgent" period = var.settings.net_err_out.period stat = var.settings.net_err_out.statistic dimensions = { InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type interface = "eth0" } } } } resource "aws_cloudwatch_metric_alarm" "ec2-NetworkIn" { count = try(var.settings.NetworkIn.monitor, false) ? 1 : 0 alarm_name = "${var.settings.NetworkIn.ecccode}-EC2_${var.ec2-instance-id}-NetworkIn" comparison_operator = var.settings.NetworkIn.comparison_operator evaluation_periods = var.settings.NetworkIn.evaluation_periods metric_name = "NetworkIn" period = var.settings.NetworkIn.period statistic = var.settings.NetworkIn.statistic threshold = var.settings.NetworkIn.threshold # alarm_description = "EC2:NetworkIn" alarm_description = local.alarm-message namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.NetworkIn.action] ok_actions = [var.settings.NetworkIn.action] dimensions = { InstanceId = var.ec2-instance-id } } resource "aws_cloudwatch_metric_alarm" "ec2-NetworkOut" { count = try(var.settings.NetworkIn.monitor, false) ? 1 : 0 alarm_name = "${var.settings.NetworkOut.ecccode}-EC2_${var.ec2-instance-id}-NetworkOut" comparison_operator = var.settings.NetworkOut.comparison_operator evaluation_periods = var.settings.NetworkOut.evaluation_periods metric_name = "NetworkOut" period = var.settings.NetworkOut.period statistic = var.settings.NetworkOut.statistic threshold = var.settings.NetworkOut.threshold # alarm_description = "EC2:NetworkOut" alarm_description = local.alarm-message namespace = "AWS/EC2" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.NetworkOut.action] ok_actions = [var.settings.NetworkOut.action] dimensions = { InstanceId = var.ec2-instance-id } } # Windows specific checks resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" { count = module.ec2_os.awscliout[0] == "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0 alarm_name = "${var.settings.MemoryCommittedPct.ecccode}-EC2_${var.ec2-instance-id}-MemoryCommittedPct" comparison_operator = var.settings.MemoryCommittedPct.comparison_operator evaluation_periods = var.settings.MemoryCommittedPct.evaluation_periods metric_name = "Memory % Committed Bytes In Use" period = var.settings.MemoryCommittedPct.period statistic = var.settings.MemoryCommittedPct.statistic threshold = var.settings.MemoryCommittedPct.threshold # alarm_description = "EC2:MemoryCommittedBytes" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.MemoryCommittedPct.action] ok_actions = [var.settings.MemoryCommittedPct.action] dimensions = { objectname = "Memory" InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } } resource "aws_cloudwatch_metric_alarm" "ec2-LogicalDiskFreePct" { count = module.ec2_os.awscliout[0] == "Windows" && length(module.detect_cloudwatch_agent.awscliout) > 0 ? 1 : 0 alarm_name = "${var.settings.LogicalDiskFreePct.ecccode}-EC2_${var.ec2-instance-id}-LogicalDiskFreePct" comparison_operator = var.settings.LogicalDiskFreePct.comparison_operator evaluation_periods = var.settings.LogicalDiskFreePct.evaluation_periods metric_name = "LogicalDisk % Free Space" period = var.settings.LogicalDiskFreePct.period statistic = var.settings.LogicalDiskFreePct.statistic threshold = var.settings.LogicalDiskFreePct.threshold # alarm_description = "EC2:OsDiskFreePct" alarm_description = local.alarm-message namespace = "CWAgent" insufficient_data_actions = [] actions_enabled = var.actions-enabled alarm_actions = [var.settings.LogicalDiskFreePct.action] ok_actions = [var.settings.LogicalDiskFreePct.action] dimensions = { instance = "C:" objectname = "LogicalDisk" InstanceId = var.ec2-instance-id ImageId = data.aws_instance.ec2-instance.ami InstanceType = data.aws_instance.ec2-instance.instance_type } }