UPD: added EC2 monitoring with cwagent
This commit is contained in:
parent
b3ba6f2441
commit
282df3c08b
@ -67,3 +67,145 @@ resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
# cwagent metrics
|
||||
data "aws_instance" "ec2-instance" {
|
||||
instance_id = var.ec2-instance-id
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-mem_free" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:mem_free:${var.ec2-instance-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "mem_free"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-mem_free
|
||||
alarm_description = "EC2:mem_free"
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-standard]
|
||||
ok_actions = [var.alarm-actions-standard]
|
||||
treat_missing_data = "notBreaching"
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-swap_free" {
|
||||
# zero is fine as most ec2 instances are deployed without any swap
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:swap_free:${var.ec2-instance-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
# metric_name = "swap_free"
|
||||
# period = "900"
|
||||
# statistic = "Average"
|
||||
threshold = var.threshold-swap_free
|
||||
alarm_description = "EC2:swap_free"
|
||||
# namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-standard]
|
||||
ok_actions = [var.alarm-actions-standard]
|
||||
treat_missing_data = "notBreaching"
|
||||
metric_query {
|
||||
id = "m1"
|
||||
metric {
|
||||
metric_name = "swap_free"
|
||||
namespace = "CWAgent"
|
||||
period = 900
|
||||
stat = "Average"
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
}
|
||||
}
|
||||
}
|
||||
metric_query {
|
||||
id = "e1"
|
||||
expression = "IF(m1==0, ${var.threshold-swap_free}, m1)"
|
||||
label = "swap_free_if_not_zero"
|
||||
return_data = "true"
|
||||
}
|
||||
|
||||
#dimensions = {
|
||||
# InstanceId = var.ec2-instance-id
|
||||
# ImageId = data.aws_instance.ec2-instance.ami
|
||||
# InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
#}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-disk_free" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:disk_free:${var.ec2-instance-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "disk_free"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-disk_free
|
||||
alarm_description = "EC2:disk_free"
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
treat_missing_data = "notBreaching"
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
# The following will not work. AWS expects the device name from OS, not from AWS perspective
|
||||
# device = one(data.aws_instance.ec2-instance.root_block_device[*].device_name)
|
||||
# device = ""
|
||||
fstype = "xfs"
|
||||
path = "/"
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:disk_inodes_free:${var.ec2-instance-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "disk_inodes_free"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-disk_inodes_free
|
||||
alarm_description = "EC2:disk_inodes_free"
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
treat_missing_data = "notBreaching"
|
||||
dimensions = {
|
||||
InstanceId = var.ec2-instance-id
|
||||
ImageId = data.aws_instance.ec2-instance.ami
|
||||
InstanceType = data.aws_instance.ec2-instance.instance_type
|
||||
# The following will not work. AWS expects the device name from OS, not from AWS perspective
|
||||
# device = one(data.aws_instance.ec2-instance.root_block_device[*].device_name)
|
||||
# device = ""
|
||||
fstype = "xfs"
|
||||
path = "/"
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
@ -1,22 +1,26 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable ec2-instance-id {}
|
||||
variable alarm-actions-urgent {
|
||||
variable "cw-alarm-prefix" {}
|
||||
variable "actions-enabled" {}
|
||||
variable "ec2-instance-id" {}
|
||||
variable "alarm-actions-urgent" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||
}
|
||||
variable alarm-actions-emergency {
|
||||
variable "alarm-actions-emergency" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||
}
|
||||
variable alarm-actions-standard {
|
||||
variable "alarm-actions-standard" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||
}
|
||||
variable alarm-actions-general {
|
||||
variable "alarm-actions-general" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||
}
|
||||
variable default-tags {}
|
||||
variable "default-tags" {}
|
||||
|
||||
variable threshold-CPUUtilization {}
|
||||
variable "threshold-CPUUtilization" {}
|
||||
variable "threshold-mem_free" {}
|
||||
variable "threshold-swap_free" {}
|
||||
variable "threshold-disk_free" {}
|
||||
variable "threshold-disk_inodes_free" {}
|
@ -17,7 +17,7 @@ module "nlb-targetgroups" {
|
||||
resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" {
|
||||
for_each = module.nlb-targetgroups.result-set
|
||||
alarm_name = "${var.cw-alarm-prefix}:NLBTG:HealthyHostCount:${split(":", each.value)[5]}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
comparison_operator = "LessThanOrEqualToThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "HealthyHostCount"
|
||||
period = "300"
|
||||
|
@ -70,7 +70,7 @@ resource "aws_cloudwatch_metric_alarm" "rds-memory" {
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-DiskQueueDepth" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:DiskQueueDepth:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "DiskQueueDepth"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
|
Loading…
Reference in New Issue
Block a user