NEW: cloudwatch modules for various resource monitoring
This commit is contained in:
parent
f43ef2bb3e
commit
4438ecbcd5
5
modules/ManagementGovernance/Monitoring.ALB/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.ALB/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Monitoring module for BEA
|
||||||
|
This module deploys the default cloudwatch metric monitoring
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
6
modules/ManagementGovernance/Monitoring.ALB/list-alb-targetgroups.sh
Executable file
6
modules/ManagementGovernance/Monitoring.ALB/list-alb-targetgroups.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
eval "$(jq -r '@sh "lb=\(.lb)"')"
|
||||||
|
|
||||||
|
RESULTS=$(aws elbv2 describe-target-groups --load-balancer-arn $lb --query TargetGroups[*].TargetGroupArn --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||||
|
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||||
|
|
31
modules/ManagementGovernance/Monitoring.ALB/main.tf
Normal file
31
modules/ManagementGovernance/Monitoring.ALB/main.tf
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
data external alb-targetgroups {
|
||||||
|
program = ["bash", "../../modules/ManagementGovernance/Monitoring.ALB/list-alb-targetgroups.sh"]
|
||||||
|
query = {
|
||||||
|
lb = var.load-balancer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource aws_cloudwatch_metric_alarm alb-HealthyHostCount {
|
||||||
|
for_each = toset(split(" ", data.external.alb-targetgroups.result.result))
|
||||||
|
alarm_name = "ALBTG:HealthyHostCount:${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "HealthyHostCount"
|
||||||
|
period = "300"
|
||||||
|
statistic = "Minimum"
|
||||||
|
threshold = var.threshold-HealthHostCountMin
|
||||||
|
alarm_description = "ALBTG:HealthyHostCount"
|
||||||
|
namespace = "AWS/ApplicationELB"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-emergency]
|
||||||
|
ok_actions = [var.alarm-actions-emergency]
|
||||||
|
dimensions = {
|
||||||
|
TargetGroup = "targetgroup/${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||||
|
LoadBalancer = "app/${split("/",var.load-balancer)[2]}/${split("/",var.load-balancer)[3]}"
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
9
modules/ManagementGovernance/Monitoring.ALB/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.ALB/provider.tf
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = "~> 1.3.0"
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 4.36.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
20
modules/ManagementGovernance/Monitoring.ALB/variables.tf
Normal file
20
modules/ManagementGovernance/Monitoring.ALB/variables.tf
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# variable target-group {}
|
||||||
|
variable load-balancer {}
|
||||||
|
variable threshold-HealthHostCountMin {}
|
||||||
|
variable alarm-actions-urgent {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||||
|
}
|
||||||
|
variable alarm-actions-emergency {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||||
|
}
|
||||||
|
variable alarm-actions-standard {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||||
|
}
|
||||||
|
variable alarm-actions-general {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||||
|
}
|
||||||
|
variable default-tags {}
|
5
modules/ManagementGovernance/Monitoring.EC2/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.EC2/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Monitoring module for BEA
|
||||||
|
This module deploys the default cloudwatch metric monitoring
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
69
modules/ManagementGovernance/Monitoring.EC2/main.tf
Normal file
69
modules/ManagementGovernance/Monitoring.EC2/main.tf
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||||
|
alarm_name = "EC2:StatusCheckFailed_System:${var.ec2-instance-id}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "StatusCheckFailed_System"
|
||||||
|
period = "300"
|
||||||
|
statistic = "Maximum"
|
||||||
|
threshold = 0
|
||||||
|
alarm_description = "EC2:StatusCheckFailed_System"
|
||||||
|
namespace = "AWS/EC2"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-emergency]
|
||||||
|
ok_actions = [var.alarm-actions-emergency]
|
||||||
|
dimensions = {
|
||||||
|
InstanceId = var.ec2-instance-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
||||||
|
alarm_name = "EC2:StatusCheckFailed_Instance:${var.ec2-instance-id}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "StatusCheckFailed_Instance"
|
||||||
|
period = "300"
|
||||||
|
statistic = "Maximum"
|
||||||
|
threshold = 0
|
||||||
|
alarm_description = "EC2:StatusCheckFailed_Instance"
|
||||||
|
namespace = "AWS/EC2"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-emergency]
|
||||||
|
ok_actions = [var.alarm-actions-emergency]
|
||||||
|
dimensions = {
|
||||||
|
InstanceId = var.ec2-instance-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
||||||
|
alarm_name = "EC2:CPUUtilization:${var.ec2-instance-id}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "6"
|
||||||
|
metric_name = "CPUUtilization"
|
||||||
|
period = "300"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-CPUUtilization
|
||||||
|
alarm_description = "EC2:CPUUtilization"
|
||||||
|
namespace = "AWS/EC2"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
treat_missing_data = "notBreaching"
|
||||||
|
dimensions = {
|
||||||
|
InstanceId = var.ec2-instance-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
9
modules/ManagementGovernance/Monitoring.EC2/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.EC2/provider.tf
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = "~> 1.3.0"
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 4.36.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
20
modules/ManagementGovernance/Monitoring.EC2/variables.tf
Normal file
20
modules/ManagementGovernance/Monitoring.EC2/variables.tf
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
variable ec2-instance-id {}
|
||||||
|
variable alarm-actions-urgent {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||||
|
}
|
||||||
|
variable alarm-actions-emergency {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||||
|
}
|
||||||
|
variable alarm-actions-standard {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||||
|
}
|
||||||
|
variable alarm-actions-general {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||||
|
}
|
||||||
|
variable default-tags {}
|
||||||
|
|
||||||
|
variable threshold-CPUUtilization {}
|
5
modules/ManagementGovernance/Monitoring.EMR/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.EMR/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Monitoring module for BEA
|
||||||
|
This module deploys the default cloudwatch metric monitoring
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
45
modules/ManagementGovernance/Monitoring.EMR/main.tf
Normal file
45
modules/ManagementGovernance/Monitoring.EMR/main.tf
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
resource "aws_cloudwatch_metric_alarm" "emr-AppsPending" {
|
||||||
|
alarm_name = "EMR:AppsPending:${var.job-flow-id}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "AppsPending"
|
||||||
|
period = "1800"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-AppsPending
|
||||||
|
alarm_description = "EMR:AppsPending"
|
||||||
|
namespace = "AWS/ElasticMapReduce"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-standard]
|
||||||
|
ok_actions = [var.alarm-actions-standard]
|
||||||
|
dimensions = {
|
||||||
|
JobFlowId = var.job-flow-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_metric_alarm" "emr-CapacityRemainingGB" {
|
||||||
|
alarm_name = "EMR:CapacityRemainingGB:${var.job-flow-id}"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "CapacityRemainingGB"
|
||||||
|
period = "3600"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-CapacityRemainingGB
|
||||||
|
alarm_description = "EMR:CapacityRemainingGB"
|
||||||
|
namespace = "AWS/ElasticMapReduce"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
JobFlowId = var.job-flow-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
9
modules/ManagementGovernance/Monitoring.EMR/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.EMR/provider.tf
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = "~> 1.3.0"
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 4.36.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
21
modules/ManagementGovernance/Monitoring.EMR/variables.tf
Normal file
21
modules/ManagementGovernance/Monitoring.EMR/variables.tf
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
variable job-flow-id {}
|
||||||
|
variable threshold-AppsPending {}
|
||||||
|
variable threshold-CapacityRemainingGB {}
|
||||||
|
|
||||||
|
variable alarm-actions-urgent {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||||
|
}
|
||||||
|
variable alarm-actions-emergency {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||||
|
}
|
||||||
|
variable alarm-actions-standard {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||||
|
}
|
||||||
|
variable alarm-actions-general {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||||
|
}
|
||||||
|
variable default-tags {}
|
5
modules/ManagementGovernance/Monitoring.NLB/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.NLB/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Monitoring module for BEA
|
||||||
|
This module deploys the default cloudwatch metric monitoring
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
6
modules/ManagementGovernance/Monitoring.NLB/list-nlb-targetgroups.sh
Executable file
6
modules/ManagementGovernance/Monitoring.NLB/list-nlb-targetgroups.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
eval "$(jq -r '@sh "lb=\(.lb)"')"
|
||||||
|
|
||||||
|
RESULTS=$(aws elbv2 describe-target-groups --load-balancer-arn $lb --query TargetGroups[*].TargetGroupArn --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||||
|
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||||
|
|
31
modules/ManagementGovernance/Monitoring.NLB/main.tf
Normal file
31
modules/ManagementGovernance/Monitoring.NLB/main.tf
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
data external nlb-targetgroups {
|
||||||
|
program = ["bash", "../../modules/ManagementGovernance/Monitoring.NLB/list-nlb-targetgroups.sh"]
|
||||||
|
query = {
|
||||||
|
lb = var.load-balancer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource aws_cloudwatch_metric_alarm nlb-HealthyHostCount {
|
||||||
|
for_each = toset(split(" ", data.external.nlb-targetgroups.result.result))
|
||||||
|
alarm_name = "NLBTG:HealthyHostCount:${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "HealthyHostCount"
|
||||||
|
period = "300"
|
||||||
|
statistic = "Minimum"
|
||||||
|
threshold = var.threshold-HealthHostCountMin
|
||||||
|
alarm_description = "NLBTG:HealthyHostCount"
|
||||||
|
namespace = "AWS/NetworkELB"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-emergency]
|
||||||
|
ok_actions = [var.alarm-actions-emergency]
|
||||||
|
dimensions = {
|
||||||
|
TargetGroup = "targetgroup/${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||||
|
LoadBalancer = "net/${split("/",var.load-balancer)[2]}/${split("/",var.load-balancer)[3]}"
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
9
modules/ManagementGovernance/Monitoring.NLB/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.NLB/provider.tf
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = "~> 1.3.0"
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 4.36.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
20
modules/ManagementGovernance/Monitoring.NLB/variables.tf
Normal file
20
modules/ManagementGovernance/Monitoring.NLB/variables.tf
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# variable target-group {}
|
||||||
|
variable load-balancer {}
|
||||||
|
variable threshold-HealthHostCountMin {}
|
||||||
|
variable alarm-actions-urgent {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||||
|
}
|
||||||
|
variable alarm-actions-emergency {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||||
|
}
|
||||||
|
variable alarm-actions-standard {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||||
|
}
|
||||||
|
variable alarm-actions-general {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||||
|
}
|
||||||
|
variable default-tags {}
|
5
modules/ManagementGovernance/Monitoring.RDS/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.RDS/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Monitoring module for BEA
|
||||||
|
This module deploys the default cloudwatch metric monitoring
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
91
modules/ManagementGovernance/Monitoring.RDS/main.tf
Normal file
91
modules/ManagementGovernance/Monitoring.RDS/main.tf
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
resource aws_cloudwatch_metric_alarm rds-cpu {
|
||||||
|
alarm_name = "RDS:CpuUtilization:${var.rds-instance-name}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "CPUUtilization"
|
||||||
|
period = "3600"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-CpuUtilization
|
||||||
|
alarm_description = "RDS:CpuUtilization"
|
||||||
|
namespace = "AWS/RDS"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
DBInstanceIdentifier = var.rds-instance-name
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource aws_cloudwatch_metric_alarm rds-storage {
|
||||||
|
alarm_name = "RDS:FreeStorageSpace:${var.rds-instance-name}"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "FreeStorageSpace"
|
||||||
|
period = "3600"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-FreeStorageSpace
|
||||||
|
alarm_description = "RDS:FreeStorageSpace"
|
||||||
|
namespace = "AWS/RDS"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
DBInstanceIdentifier = var.rds-instance-name
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource aws_cloudwatch_metric_alarm rds-memory {
|
||||||
|
alarm_name = "RDS:FreeableMemory:${var.rds-instance-name}"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "FreeableMemory"
|
||||||
|
period = "3600"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-FreeableMemory
|
||||||
|
alarm_description = "RDS:FreeableMemory"
|
||||||
|
namespace = "AWS/RDS"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
DBInstanceIdentifier = var.rds-instance-name
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource aws_cloudwatch_metric_alarm rds-DiskQueueDepth {
|
||||||
|
alarm_name = "RDS:DiskQueueDepth:${var.rds-instance-name}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "DiskQueueDepth"
|
||||||
|
period = "300"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-DiskQueueDepth
|
||||||
|
alarm_description = "RDS:DiskQueueDepth"
|
||||||
|
namespace = "AWS/RDS"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
DBInstanceIdentifier = var.rds-instance-name
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
9
modules/ManagementGovernance/Monitoring.RDS/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.RDS/provider.tf
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = "~> 1.3.0"
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 4.36.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
23
modules/ManagementGovernance/Monitoring.RDS/variables.tf
Normal file
23
modules/ManagementGovernance/Monitoring.RDS/variables.tf
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
variable rds-instance-name {}
|
||||||
|
variable alarm-actions-urgent {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||||
|
}
|
||||||
|
variable alarm-actions-emergency {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||||
|
}
|
||||||
|
variable alarm-actions-standard {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||||
|
}
|
||||||
|
variable alarm-actions-general {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||||
|
}
|
||||||
|
variable default-tags {}
|
||||||
|
|
||||||
|
variable threshold-FreeableMemory {}
|
||||||
|
variable threshold-CpuUtilization {}
|
||||||
|
variable threshold-FreeStorageSpace {}
|
||||||
|
variable threshold-DiskQueueDepth {}
|
5
modules/ManagementGovernance/Monitoring.Redis/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.Redis/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Monitoring module for BEA
|
||||||
|
This module deploys the default cloudwatch metric monitoring
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
75
modules/ManagementGovernance/Monitoring.Redis/main.tf
Normal file
75
modules/ManagementGovernance/Monitoring.Redis/main.tf
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
resource aws_cloudwatch_metric_alarm redis-EngineCPUUtilization {
|
||||||
|
alarm_name = "Redis:EngineCPUUtilization:${var.redis-cluster-id}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "EngineCPUUtilization"
|
||||||
|
period = "3600"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-EngineCPUUtilization
|
||||||
|
alarm_description = "Redis:EngineCPUUtilization"
|
||||||
|
namespace = "AWS/ElastiCache"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
CacheClusterId = var.redis-cluster-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource aws_cloudwatch_metric_alarm redis-DatabaseMemoryUsagePercentage {
|
||||||
|
alarm_name = "Redis:DatabaseMemoryUsagePercentage:${var.redis-cluster-id}"
|
||||||
|
comparison_operator = "GreaterThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "DatabaseMemoryUsagePercentage"
|
||||||
|
period = "3600"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-DatabaseMemoryUsagePercentage
|
||||||
|
alarm_description = "Redis:DatabaseMemoryUsagePercentage"
|
||||||
|
namespace = "AWS/ElastiCache"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-urgent]
|
||||||
|
ok_actions = [var.alarm-actions-urgent]
|
||||||
|
dimensions = {
|
||||||
|
CacheClusterId = var.redis-cluster-id
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
data aws_elasticache_cluster redis-cluster {
|
||||||
|
cluster_id = var.redis-cluster-id
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
resource aws_cloudwatch_metric_alarm redis-CacheHitRate {
|
||||||
|
# for_each = toset(data.aws_elasticache_cluster.redis-cluster.cache_nodes.*.id)
|
||||||
|
alarm_name = "Redis:CacheHitRate:${var.redis-cluster-id}"
|
||||||
|
comparison_operator = "LessThanThreshold"
|
||||||
|
evaluation_periods = "1"
|
||||||
|
metric_name = "CacheHitRate"
|
||||||
|
period = "1800"
|
||||||
|
statistic = "Average"
|
||||||
|
threshold = var.threshold-CacheHitRate
|
||||||
|
alarm_description = "Redis:CacheHitRate"
|
||||||
|
namespace = "AWS/ElastiCache"
|
||||||
|
insufficient_data_actions = []
|
||||||
|
actions_enabled = "true"
|
||||||
|
alarm_actions = [var.alarm-actions-standard]
|
||||||
|
ok_actions = [var.alarm-actions-standard]
|
||||||
|
dimensions = {
|
||||||
|
CacheClusterId = var.redis-cluster-id
|
||||||
|
# CacheNodeId = each.value
|
||||||
|
}
|
||||||
|
tags = var.default-tags
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [tags]
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = "~> 1.3.0"
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 4.36.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
22
modules/ManagementGovernance/Monitoring.Redis/variables.tf
Normal file
22
modules/ManagementGovernance/Monitoring.Redis/variables.tf
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
variable redis-cluster-id {}
|
||||||
|
variable alarm-actions-urgent {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||||
|
}
|
||||||
|
variable alarm-actions-emergency {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||||
|
}
|
||||||
|
variable alarm-actions-standard {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||||
|
}
|
||||||
|
variable alarm-actions-general {
|
||||||
|
type = string
|
||||||
|
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||||
|
}
|
||||||
|
variable default-tags {}
|
||||||
|
|
||||||
|
variable threshold-EngineCPUUtilization {}
|
||||||
|
variable threshold-DatabaseMemoryUsagePercentage {}
|
||||||
|
variable threshold-CacheHitRate {}
|
Loading…
Reference in New Issue
Block a user