UPD: Added more monitoring modules and various enhancements
This commit is contained in:
parent
2af0ff1b1a
commit
b3ba6f2441
@ -1,28 +1,28 @@
|
||||
data external alb-targetgroups {
|
||||
data "external" "alb-targetgroups" {
|
||||
program = ["bash", "../../modules/ManagementGovernance/Monitoring.ALB/list-alb-targetgroups.sh"]
|
||||
query = {
|
||||
lb = var.load-balancer
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm alb-HealthyHostCount {
|
||||
for_each = toset(split(" ", data.external.alb-targetgroups.result.result))
|
||||
alarm_name = "ALBTG:HealthyHostCount:${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "HealthyHostCount"
|
||||
period = "300"
|
||||
statistic = "Minimum"
|
||||
threshold = var.threshold-HealthHostCountMin
|
||||
alarm_description = "ALBTG:HealthyHostCount"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
resource "aws_cloudwatch_metric_alarm" "alb-HealthyHostCount" {
|
||||
for_each = toset(split(" ", data.external.alb-targetgroups.result.result))
|
||||
alarm_name = "${var.cw-alarm-prefix}:ALBTG:HealthyHostCount:${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "HealthyHostCount"
|
||||
period = "300"
|
||||
statistic = "Minimum"
|
||||
threshold = var.threshold-HealthHostCountMin
|
||||
alarm_description = "ALBTG:HealthyHostCount"
|
||||
namespace = "AWS/ApplicationELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-emergency]
|
||||
ok_actions = [var.alarm-actions-emergency]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-emergency]
|
||||
ok_actions = [var.alarm-actions-emergency]
|
||||
dimensions = {
|
||||
TargetGroup = "targetgroup/${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||
LoadBalancer = "app/${split("/",var.load-balancer)[2]}/${split("/",var.load-balancer)[3]}"
|
||||
TargetGroup = "targetgroup/${split("/", each.value)[1]}/${split("/", each.value)[2]}"
|
||||
LoadBalancer = "app/${split("/", var.load-balancer)[2]}/${split("/", var.load-balancer)[3]}"
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
|
@ -1,4 +1,5 @@
|
||||
# variable target-group {}
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable load-balancer {}
|
||||
variable threshold-HealthHostCountMin {}
|
||||
variable alarm-actions-urgent {
|
||||
|
5
modules/ManagementGovernance/Monitoring.ASG/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.ASG/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Monitoring module for BEA
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
22
modules/ManagementGovernance/Monitoring.ASG/main.tf
Normal file
22
modules/ManagementGovernance/Monitoring.ASG/main.tf
Normal file
@ -0,0 +1,22 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "asg-CPUUtilization" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:ASG:CPUUtilization:${var.asg-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "CPUUtilization"
|
||||
period = "1800"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-CPUUtilization
|
||||
alarm_description = "ASG:CPUUtilization"
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
AutoScalingGroupName = var.asg-name
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
9
modules/ManagementGovernance/Monitoring.ASG/provider.tf
Normal file
9
modules/ManagementGovernance/Monitoring.ASG/provider.tf
Normal file
@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
22
modules/ManagementGovernance/Monitoring.ASG/variables.tf
Normal file
22
modules/ManagementGovernance/Monitoring.ASG/variables.tf
Normal file
@ -0,0 +1,22 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable asg-name {}
|
||||
variable alarm-actions-urgent {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||
}
|
||||
variable alarm-actions-emergency {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||
}
|
||||
variable alarm-actions-standard {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||
}
|
||||
variable alarm-actions-general {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||
}
|
||||
variable default-tags {}
|
||||
|
||||
variable threshold-CPUUtilization {}
|
@ -1,5 +1,5 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||
alarm_name = "EC2:StatusCheckFailed_System:${var.ec2-instance-id}"
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_System:${var.ec2-instance-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "StatusCheckFailed_System"
|
||||
@ -9,7 +9,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||
alarm_description = "EC2:StatusCheckFailed_System"
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-emergency]
|
||||
ok_actions = [var.alarm-actions-emergency]
|
||||
dimensions = {
|
||||
@ -22,7 +22,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
||||
alarm_name = "EC2:StatusCheckFailed_Instance:${var.ec2-instance-id}"
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:StatusCheckFailed_Instance:${var.ec2-instance-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "StatusCheckFailed_Instance"
|
||||
@ -32,7 +32,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
||||
alarm_description = "EC2:StatusCheckFailed_Instance"
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-emergency]
|
||||
ok_actions = [var.alarm-actions-emergency]
|
||||
dimensions = {
|
||||
@ -45,7 +45,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
||||
alarm_name = "EC2:CPUUtilization:${var.ec2-instance-id}"
|
||||
alarm_name = "${var.cw-alarm-prefix}:EC2:CPUUtilization:${var.ec2-instance-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "6"
|
||||
metric_name = "CPUUtilization"
|
||||
@ -55,7 +55,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
||||
alarm_description = "EC2:CPUUtilization"
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
treat_missing_data = "notBreaching"
|
||||
|
@ -1,3 +1,5 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable ec2-instance-id {}
|
||||
variable alarm-actions-urgent {
|
||||
type = string
|
||||
|
@ -1,5 +1,5 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "emr-AppsPending" {
|
||||
alarm_name = "EMR:AppsPending:${var.job-flow-id}"
|
||||
alarm_name = "${var.cw-alarm-prefix}:EMR:AppsPending:${var.job-flow-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "AppsPending"
|
||||
@ -9,7 +9,7 @@ resource "aws_cloudwatch_metric_alarm" "emr-AppsPending" {
|
||||
alarm_description = "EMR:AppsPending"
|
||||
namespace = "AWS/ElasticMapReduce"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-standard]
|
||||
ok_actions = [var.alarm-actions-standard]
|
||||
dimensions = {
|
||||
@ -22,7 +22,7 @@ resource "aws_cloudwatch_metric_alarm" "emr-AppsPending" {
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "emr-CapacityRemainingGB" {
|
||||
alarm_name = "EMR:CapacityRemainingGB:${var.job-flow-id}"
|
||||
alarm_name = "${var.cw-alarm-prefix}:EMR:CapacityRemainingGB:${var.job-flow-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "CapacityRemainingGB"
|
||||
@ -32,7 +32,7 @@ resource "aws_cloudwatch_metric_alarm" "emr-CapacityRemainingGB" {
|
||||
alarm_description = "EMR:CapacityRemainingGB"
|
||||
namespace = "AWS/ElasticMapReduce"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
|
@ -1,3 +1,5 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable job-flow-id {}
|
||||
variable threshold-AppsPending {}
|
||||
variable threshold-CapacityRemainingGB {}
|
||||
|
5
modules/ManagementGovernance/Monitoring.Kafka/README.md
Normal file
5
modules/ManagementGovernance/Monitoring.Kafka/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Monitoring module for BEA
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
22
modules/ManagementGovernance/Monitoring.Kafka/main.tf
Normal file
22
modules/ManagementGovernance/Monitoring.Kafka/main.tf
Normal file
@ -0,0 +1,22 @@
|
||||
resource "aws_cloudwatch_metric_alarm" "Kafka-ZooKeeperRequestLatencyMsMean" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:Kafka:ZooKeeperRequestLatencyMsMean:${var.cluster-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "ZooKeeperRequestLatencyMsMean"
|
||||
period = "1800"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-ZooKeeperRequestLatencyMsMean
|
||||
alarm_description = "Kafka:ZooKeeperRequestLatencyMsMean"
|
||||
namespace = "AWS/Kafka"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
"Cluster Name" = var.cluster-name
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
22
modules/ManagementGovernance/Monitoring.Kafka/variables.tf
Normal file
22
modules/ManagementGovernance/Monitoring.Kafka/variables.tf
Normal file
@ -0,0 +1,22 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable cluster-name {}
|
||||
variable alarm-actions-urgent {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||
}
|
||||
variable alarm-actions-emergency {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||
}
|
||||
variable alarm-actions-standard {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||
}
|
||||
variable alarm-actions-general {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||
}
|
||||
variable default-tags {}
|
||||
|
||||
variable threshold-ZooKeeperRequestLatencyMsMean {}
|
@ -16,7 +16,7 @@ module "nlb-targetgroups" {
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" {
|
||||
for_each = module.nlb-targetgroups.result-set
|
||||
alarm_name = "NLBTG:HealthyHostCount:${split(":", each.value)[5]}"
|
||||
alarm_name = "${var.cw-alarm-prefix}:NLBTG:HealthyHostCount:${split(":", each.value)[5]}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "HealthyHostCount"
|
||||
@ -26,7 +26,7 @@ resource "aws_cloudwatch_metric_alarm" "nlb-HealthyHostCount" {
|
||||
alarm_description = "NLBTG:HealthyHostCount"
|
||||
namespace = "AWS/NetworkELB"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-emergency]
|
||||
ok_actions = [var.alarm-actions-emergency]
|
||||
dimensions = {
|
||||
|
@ -1,4 +1,5 @@
|
||||
# variable target-group {}
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable load-balancer {}
|
||||
variable threshold-HealthHostCountMin {}
|
||||
variable alarm-actions-urgent {
|
||||
|
@ -0,0 +1,5 @@
|
||||
# Monitoring module for BEA
|
||||
This module deploys the default cloudwatch metric monitoring
|
||||
|
||||
## Notes
|
||||
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
|
98
modules/ManagementGovernance/Monitoring.OpenSearch/main.tf
Normal file
98
modules/ManagementGovernance/Monitoring.OpenSearch/main.tf
Normal file
@ -0,0 +1,98 @@
|
||||
data "aws_caller_identity" "this" {}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ES-CPUUtilization" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:ES:CPUUtilization:${var.domain-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "CPUUtilization"
|
||||
period = "1800"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-CPUUtilization
|
||||
alarm_description = "ES:CPUUtilization"
|
||||
namespace = "AWS/ES"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DomainName = var.domain-name
|
||||
ClientId = data.aws_caller_identity.this.id
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ES-SearchLatency" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:ES:SearchLatency:${var.domain-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "SearchLatency"
|
||||
period = "1800"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-SearchLatency
|
||||
alarm_description = "ES:SearchLatency"
|
||||
namespace = "AWS/ES"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DomainName = var.domain-name
|
||||
ClientId = data.aws_caller_identity.this.id
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ES-IndexingLatency" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:ES:IndexingLatency:${var.domain-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "3"
|
||||
metric_name = "IndexingLatency"
|
||||
period = "1800"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-IndexingLatency
|
||||
alarm_description = "ES:IndexingLatency"
|
||||
namespace = "AWS/ES"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DomainName = var.domain-name
|
||||
ClientId = data.aws_caller_identity.this.id
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ES-ClusterStatusRed" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:ES:ClusterStatusRed:${var.domain-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "ClusterStatus.red"
|
||||
period = "900"
|
||||
statistic = "Maximum"
|
||||
threshold = 0
|
||||
alarm_description = "At least one primary shard and its replicas aren't allocated to a node."
|
||||
namespace = "AWS/ES"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DomainName = var.domain-name
|
||||
ClientId = data.aws_caller_identity.this.id
|
||||
}
|
||||
tags = var.default-tags
|
||||
lifecycle {
|
||||
ignore_changes = [tags]
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,9 @@
|
||||
terraform {
|
||||
required_version = "~> 1.3.0"
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.36.1"
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable domain-name {}
|
||||
variable alarm-actions-urgent {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||
}
|
||||
variable alarm-actions-emergency {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||
}
|
||||
variable alarm-actions-standard {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||
}
|
||||
variable alarm-actions-general {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||
}
|
||||
variable default-tags {}
|
||||
|
||||
variable threshold-CPUUtilization {}
|
||||
variable threshold-SearchLatency {}
|
||||
variable threshold-IndexingLatency {}
|
||||
# variable threshold-KibanaHealthyNodes {}
|
@ -1,17 +1,17 @@
|
||||
resource aws_cloudwatch_metric_alarm rds-cpu {
|
||||
alarm_name = "RDS:CpuUtilization:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "CPUUtilization"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-CpuUtilization
|
||||
alarm_description = "RDS:CpuUtilization"
|
||||
namespace = "AWS/RDS"
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-cpu" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:CpuUtilization:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "CPUUtilization"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-CpuUtilization
|
||||
alarm_description = "RDS:CpuUtilization"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
@ -21,20 +21,20 @@ resource aws_cloudwatch_metric_alarm rds-cpu {
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm rds-storage {
|
||||
alarm_name = "RDS:FreeStorageSpace:${var.rds-instance-name}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "FreeStorageSpace"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-FreeStorageSpace
|
||||
alarm_description = "RDS:FreeStorageSpace"
|
||||
namespace = "AWS/RDS"
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-storage" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:FreeStorageSpace:${var.rds-instance-name}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "FreeStorageSpace"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-FreeStorageSpace
|
||||
alarm_description = "RDS:FreeStorageSpace"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
@ -44,20 +44,20 @@ resource aws_cloudwatch_metric_alarm rds-storage {
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm rds-memory {
|
||||
alarm_name = "RDS:FreeableMemory:${var.rds-instance-name}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "FreeableMemory"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-FreeableMemory
|
||||
alarm_description = "RDS:FreeableMemory"
|
||||
namespace = "AWS/RDS"
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-memory" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:FreeableMemory:${var.rds-instance-name}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "FreeableMemory"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-FreeableMemory
|
||||
alarm_description = "RDS:FreeableMemory"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
@ -67,20 +67,20 @@ resource aws_cloudwatch_metric_alarm rds-memory {
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm rds-DiskQueueDepth {
|
||||
alarm_name = "RDS:DiskQueueDepth:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "DiskQueueDepth"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-DiskQueueDepth
|
||||
alarm_description = "RDS:DiskQueueDepth"
|
||||
namespace = "AWS/RDS"
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-DiskQueueDepth" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:DiskQueueDepth:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "DiskQueueDepth"
|
||||
period = "300"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-DiskQueueDepth
|
||||
alarm_description = "RDS:DiskQueueDepth"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
@ -90,20 +90,20 @@ resource aws_cloudwatch_metric_alarm rds-DiskQueueDepth {
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm rds-ReadLatency {
|
||||
alarm_name = "RDS:ReadLatency:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "ReadLatency"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-ReadLatency
|
||||
alarm_description = "RDS:ReadLatency"
|
||||
namespace = "AWS/RDS"
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-ReadLatency" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:ReadLatency:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "ReadLatency"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-ReadLatency
|
||||
alarm_description = "RDS:ReadLatency"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
@ -113,20 +113,20 @@ resource aws_cloudwatch_metric_alarm rds-ReadLatency {
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm rds-WriteLatency {
|
||||
alarm_name = "RDS:WriteLatency:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "WriteLatency"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-WriteLatency
|
||||
alarm_description = "RDS:WriteLatency"
|
||||
namespace = "AWS/RDS"
|
||||
resource "aws_cloudwatch_metric_alarm" "rds-WriteLatency" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:RDS:WriteLatency:${var.rds-instance-name}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "2"
|
||||
metric_name = "WriteLatency"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-WriteLatency
|
||||
alarm_description = "RDS:WriteLatency"
|
||||
namespace = "AWS/RDS"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
DBInstanceIdentifier = var.rds-instance-name
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
variable cw-alarm-prefix {}
|
||||
variable actions-enabled {}
|
||||
variable rds-instance-name {}
|
||||
variable alarm-actions-urgent {
|
||||
type = string
|
||||
|
@ -1,17 +1,17 @@
|
||||
resource aws_cloudwatch_metric_alarm redis-EngineCPUUtilization {
|
||||
alarm_name = "Redis:EngineCPUUtilization:${var.redis-cluster-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "EngineCPUUtilization"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-EngineCPUUtilization
|
||||
alarm_description = "Redis:EngineCPUUtilization"
|
||||
namespace = "AWS/ElastiCache"
|
||||
resource "aws_cloudwatch_metric_alarm" "redis-EngineCPUUtilization" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:Redis:EngineCPUUtilization:${var.redis-cluster-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "EngineCPUUtilization"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-EngineCPUUtilization
|
||||
alarm_description = "Redis:EngineCPUUtilization"
|
||||
namespace = "AWS/ElastiCache"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
CacheClusterId = var.redis-cluster-id
|
||||
}
|
||||
@ -21,20 +21,20 @@ resource aws_cloudwatch_metric_alarm redis-EngineCPUUtilization {
|
||||
}
|
||||
}
|
||||
|
||||
resource aws_cloudwatch_metric_alarm redis-DatabaseMemoryUsagePercentage {
|
||||
alarm_name = "Redis:DatabaseMemoryUsagePercentage:${var.redis-cluster-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "DatabaseMemoryUsagePercentage"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-DatabaseMemoryUsagePercentage
|
||||
alarm_description = "Redis:DatabaseMemoryUsagePercentage"
|
||||
namespace = "AWS/ElastiCache"
|
||||
resource "aws_cloudwatch_metric_alarm" "redis-DatabaseMemoryUsagePercentage" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:Redis:DatabaseMemoryUsagePercentage:${var.redis-cluster-id}"
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = "1"
|
||||
metric_name = "DatabaseMemoryUsagePercentage"
|
||||
period = "3600"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-DatabaseMemoryUsagePercentage
|
||||
alarm_description = "Redis:DatabaseMemoryUsagePercentage"
|
||||
namespace = "AWS/ElastiCache"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-urgent]
|
||||
ok_actions = [var.alarm-actions-urgent]
|
||||
dimensions = {
|
||||
CacheClusterId = var.redis-cluster-id
|
||||
}
|
||||
@ -44,26 +44,20 @@ resource aws_cloudwatch_metric_alarm redis-DatabaseMemoryUsagePercentage {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
data aws_elasticache_cluster redis-cluster {
|
||||
cluster_id = var.redis-cluster-id
|
||||
}
|
||||
*/
|
||||
resource aws_cloudwatch_metric_alarm redis-CacheHitRate {
|
||||
# for_each = toset(data.aws_elasticache_cluster.redis-cluster.cache_nodes.*.id)
|
||||
alarm_name = "Redis:CacheHitRate:${var.redis-cluster-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "4"
|
||||
metric_name = "CacheHitRate"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-CacheHitRate
|
||||
alarm_description = "Redis:CacheHitRate"
|
||||
namespace = "AWS/ElastiCache"
|
||||
resource "aws_cloudwatch_metric_alarm" "redis-CacheHitRate" {
|
||||
alarm_name = "${var.cw-alarm-prefix}:Redis:CacheHitRate:${var.redis-cluster-id}"
|
||||
comparison_operator = "LessThanThreshold"
|
||||
evaluation_periods = "4"
|
||||
metric_name = "CacheHitRate"
|
||||
period = "900"
|
||||
statistic = "Average"
|
||||
threshold = var.threshold-CacheHitRate
|
||||
alarm_description = "Redis:CacheHitRate"
|
||||
namespace = "AWS/ElastiCache"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = "true"
|
||||
alarm_actions = [var.alarm-actions-standard]
|
||||
ok_actions = [var.alarm-actions-standard]
|
||||
actions_enabled = var.actions-enabled
|
||||
alarm_actions = [var.alarm-actions-standard]
|
||||
ok_actions = [var.alarm-actions-standard]
|
||||
dimensions = {
|
||||
CacheClusterId = var.redis-cluster-id
|
||||
# CacheNodeId = each.value
|
||||
|
@ -1,22 +1,24 @@
|
||||
variable redis-cluster-id {}
|
||||
variable alarm-actions-urgent {
|
||||
type = string
|
||||
variable cw-alarm-prefix {}
|
||||
variable "actions-enabled" {}
|
||||
variable "redis-cluster-id" {}
|
||||
variable "alarm-actions-urgent" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-urgent"
|
||||
}
|
||||
variable alarm-actions-emergency {
|
||||
type = string
|
||||
variable "alarm-actions-emergency" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-emergency"
|
||||
}
|
||||
variable alarm-actions-standard {
|
||||
type = string
|
||||
variable "alarm-actions-standard" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support-standard"
|
||||
}
|
||||
variable alarm-actions-general {
|
||||
type = string
|
||||
variable "alarm-actions-general" {
|
||||
type = string
|
||||
default = "arn:aws:sns:ap-east-1:843733946244:rackspace-support"
|
||||
}
|
||||
variable default-tags {}
|
||||
variable "default-tags" {}
|
||||
|
||||
variable threshold-EngineCPUUtilization {}
|
||||
variable threshold-DatabaseMemoryUsagePercentage {}
|
||||
variable threshold-CacheHitRate {}
|
||||
variable "threshold-EngineCPUUtilization" {}
|
||||
variable "threshold-DatabaseMemoryUsagePercentage" {}
|
||||
variable "threshold-CacheHitRate" {}
|
@ -1,3 +1,4 @@
|
||||
#!/bin/bash
|
||||
RESULTS=$(aws elbv2 describe-load-balancers --query 'LoadBalancers[?Type==`application`].LoadBalancerArn' --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||
|
||||
|
6
modules/util/resource-list/list-asg.sh
Executable file
6
modules/util/resource-list/list-asg.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# exclude ASG instances
|
||||
RESULTS=$(aws autoscaling describe-auto-scaling-groups --query 'AutoScalingGroups[*].AutoScalingGroupName' --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||
|
||||
|
6
modules/util/resource-list/list-kafka.sh
Executable file
6
modules/util/resource-list/list-kafka.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# exclude ASG instances
|
||||
RESULTS=$(aws kafka list-clusters --query ClusterInfoList[*].ClusterName --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||
|
||||
|
6
modules/util/resource-list/list-opensearch.sh
Executable file
6
modules/util/resource-list/list-opensearch.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# exclude ASG instances
|
||||
RESULTS=$(aws opensearch list-domain-names --query DomainNames[*].DomainName --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
|
||||
jq -n --arg result "$RESULTS" '{"result":$result}'
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user