From d5f2af07c5e01210e9454ec86ea5cd3a4f8c81fd Mon Sep 17 00:00:00 2001 From: xpk Date: Thu, 10 Nov 2022 14:04:44 +0800 Subject: [PATCH] NEW: external data source for identifying EC2 OS platform --- .../Monitoring.EC2/get-os-platform.sh | 11 +++ .../Monitoring.EC2/main.tf | 71 +++++++++++++++++++ .../Monitoring.EC2/variables.tf | 4 +- 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100755 modules/ManagementGovernance/Monitoring.EC2/get-os-platform.sh diff --git a/modules/ManagementGovernance/Monitoring.EC2/get-os-platform.sh b/modules/ManagementGovernance/Monitoring.EC2/get-os-platform.sh new file mode 100755 index 0000000..79aeb3d --- /dev/null +++ b/modules/ManagementGovernance/Monitoring.EC2/get-os-platform.sh @@ -0,0 +1,11 @@ +#!/bin/bash +eval "$(jq -r '@sh "id=\(.input)"')" + +EC2OS=$(aws ec2 describe-instances --instance-ids $id | jq -r '.Reservations[].Instances[].PlatformDetails') + +if [ $EC2OS == "Windows" ]; then + echo '{"os": "Windows"}' +else + echo '{"os": "Linux"}' +fi + diff --git a/modules/ManagementGovernance/Monitoring.EC2/main.tf b/modules/ManagementGovernance/Monitoring.EC2/main.tf index cd8c891..bcc7e0e 100644 --- a/modules/ManagementGovernance/Monitoring.EC2/main.tf +++ b/modules/ManagementGovernance/Monitoring.EC2/main.tf @@ -73,7 +73,17 @@ data "aws_instance" "ec2-instance" { instance_id = var.ec2-instance-id } +# get instance OS +data "external" "ec2-os" { + program = ["bash", "${path.module}/get-os-platform.sh"] + query = { + input = var.ec2-instance-id + } +} + +# Linux specific checks resource "aws_cloudwatch_metric_alarm" "ec2-mem_free" { + count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:mem_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" @@ -99,6 +109,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-mem_free" { } resource "aws_cloudwatch_metric_alarm" "ec2-swap_free" { + count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 # zero is fine as most ec2 instances are deployed without any swap alarm_name = "${var.cw-alarm-prefix}:EC2:swap_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" @@ -146,6 +157,7 @@ data "external" "disk-device" { } resource "aws_cloudwatch_metric_alarm" "ec2-disk_free" { + count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:disk_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" @@ -175,6 +187,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_free" { resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" { + count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:disk_inodes_free:${var.ec2-instance-id}" comparison_operator = "LessThanThreshold" evaluation_periods = "2" @@ -203,6 +216,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" { } resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" { + count = data.external.ec2-os.result.os == "Linux" ? 1 : 0 alarm_name = "${var.cw-alarm-prefix}:EC2:processes_total:${var.ec2-instance-id}" comparison_operator = "GreaterThanThreshold" evaluation_periods = "2" @@ -225,4 +239,61 @@ resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" { lifecycle { ignore_changes = [tags] } +} + + +# Windows specific checks +resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" { + count = data.external.ec2-os.result.os == "Windows" ? 1 : 0 + alarm_name = "${var.cw-alarm-prefix}:EC2:MemoryCommittedPct:${var.ec2-instance-id}" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = "2" + metric_name = "Memory % Committed Bytes In Use" + period = "900" + statistic = "Average" + threshold = var.threshold-MemoryCommittedPct + alarm_description = "EC2:MemoryCommittedBytes" + namespace = "CWAgent" + insufficient_data_actions = [] + actions_enabled = var.actions-enabled + alarm_actions = [var.sns-targets.alarm-actions-urgent] + ok_actions = [var.sns-targets.alarm-actions-urgent] + dimensions = { + objectname = "Memory" + InstanceId = var.ec2-instance-id + ImageId = data.aws_instance.ec2-instance.ami + InstanceType = data.aws_instance.ec2-instance.instance_type + } + tags = var.default-tags + lifecycle { + ignore_changes = [tags] + } +} + +resource "aws_cloudwatch_metric_alarm" "ec2-OsDiskFreePct" { + count = data.external.ec2-os.result.os == "Windows" ? 1 : 0 + alarm_name = "${var.cw-alarm-prefix}:EC2:OsDiskFreePct:${var.ec2-instance-id}" + comparison_operator = "LessThanThreshold" + evaluation_periods = "2" + metric_name = "LogicalDisk % Free Space" + period = "300" + statistic = "Average" + threshold = var.threshold-LogicalDiskFreePct + alarm_description = "EC2:OsDiskFreePct" + namespace = "CWAgent" + insufficient_data_actions = [] + actions_enabled = var.actions-enabled + alarm_actions = [var.sns-targets.alarm-actions-urgent] + ok_actions = [var.sns-targets.alarm-actions-urgent] + dimensions = { + instance = "C:" + objectname = "LogicalDisk" + InstanceId = var.ec2-instance-id + ImageId = data.aws_instance.ec2-instance.ami + InstanceType = data.aws_instance.ec2-instance.instance_type + } + tags = var.default-tags + lifecycle { + ignore_changes = [tags] + } } \ No newline at end of file diff --git a/modules/ManagementGovernance/Monitoring.EC2/variables.tf b/modules/ManagementGovernance/Monitoring.EC2/variables.tf index 0e501f8..41aae1c 100644 --- a/modules/ManagementGovernance/Monitoring.EC2/variables.tf +++ b/modules/ManagementGovernance/Monitoring.EC2/variables.tf @@ -10,4 +10,6 @@ variable "threshold-mem_free" {} variable "threshold-swap_free" {} variable "threshold-disk_free" {} variable "threshold-disk_inodes_free" {} -variable "threshold-processes_total" {} \ No newline at end of file +variable "threshold-processes_total" {} +variable threshold-MemoryCommittedPct {} +variable threshold-LogicalDiskFreePct {} \ No newline at end of file