NEW: TGW and NGW monitoring

This commit is contained in:
xpk 2022-12-08 10:42:05 +08:00
parent a9e77c22da
commit 4d61d0943e
Signed by: xpk
GPG Key ID: CD4FF6793F09AB86
10 changed files with 179 additions and 0 deletions

View File

@ -0,0 +1,26 @@
# Monitoring module
This module deploys the default cloudwatch metric monitoring
## Notes
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
## Example
```terraform
module "ngw" {
source = "../../modules/util/resource-list"
resource-type = "ngw"
}
module "ngw-monitoring" {
cw-alarm-prefix = local.cw-alarm-prefix
for_each = module.ngw.result-set
source = "../../modules/ManagementGovernance/Monitoring.NGW"
default-tags = local.default-tags
job-flow-id = split("/", each.value)[1]
threshold-ErrorPortAllocation = 2
threshold-ConnectionEstablishedCount = 1000
threshold-PacketsDropCount = 10
actions-enabled = var.actions-enabled
sns-targets = var.sns-targets
}
```

View File

@ -0,0 +1,68 @@
resource "aws_cloudwatch_metric_alarm" "ngw-ErrorPortAllocation" {
alarm_name = "${var.cw-alarm-prefix}:NGW:ErrorPortAllocation:${var.res-id}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "ErrorPortAllocation"
period = "300"
statistic = "Average"
threshold = var.threshold-ErrorPortAllocation
alarm_description = "NGW:ErrorPortAllocation"
namespace = "AWS/NATGateway"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.sns-targets.alarm-actions-urgent]
ok_actions = [var.sns-targets.alarm-actions-urgent]
dimensions = {
NatGatewayId = var.res-id
}
tags = var.default-tags
lifecycle {
ignore_changes = [tags]
}
}
resource "aws_cloudwatch_metric_alarm" "ngw-ConnectionEstablishedCount" {
alarm_name = "${var.cw-alarm-prefix}:NGW:ConnectionEstablishedCount:${var.res-id}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "ConnectionEstablishedCount"
period = "300"
statistic = "Average"
threshold = var.threshold-ConnectionEstablishedCount
alarm_description = "NGW:ConnectionEstablishedCount"
namespace = "AWS/NATGateway"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.sns-targets.alarm-actions-standard]
ok_actions = [var.sns-targets.alarm-actions-standard]
dimensions = {
NatGatewayId = var.res-id
}
tags = var.default-tags
lifecycle {
ignore_changes = [tags]
}
}
resource "aws_cloudwatch_metric_alarm" "ngw-PacketsDropCount" {
alarm_name = "${var.cw-alarm-prefix}:NGW:PacketsDropCount:${var.res-id}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "PacketsDropCount"
period = "300"
statistic = "Average"
threshold = var.threshold-PacketsDropCount
alarm_description = "NGW:PacketsDropCount"
namespace = "AWS/NATGateway"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.sns-targets.alarm-actions-standard]
ok_actions = [var.sns-targets.alarm-actions-standard]
dimensions = {
NatGatewayId = var.res-id
}
tags = var.default-tags
lifecycle {
ignore_changes = [tags]
}
}

View File

@ -0,0 +1,9 @@
terraform {
required_version = "~> 1.3.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.36.1"
}
}
}

View File

@ -0,0 +1,8 @@
variable cw-alarm-prefix {}
variable actions-enabled {}
variable res-id {}
variable threshold-ErrorPortAllocation { }
variable threshold-ConnectionEstablishedCount {}
variable threshold-PacketsDropCount {}
variable sns-targets {}
variable default-tags {}

View File

@ -0,0 +1,24 @@
# Monitoring module
This module deploys the default cloudwatch metric monitoring
## Notes
Terraform lifecycle ignores tags to speed up terraform subsequent update. Cloudwatch alarm tags cannot be read on aws console anyway.
## Example
```terraform
module "tgw" {
source = "../../modules/util/resource-list"
resource-type = "tgw"
}
module "tgw-monitoring" {
cw-alarm-prefix = local.cw-alarm-prefix
for_each = module.tgw.result-set
source = "../../modules/ManagementGovernance/Monitoring.TGW"
default-tags = local.default-tags
job-flow-id = split("/", each.value)[1]
threshold-PacketDropCountNoRoute = 1
actions-enabled = var.actions-enabled
sns-targets = var.sns-targets
}
```

View File

@ -0,0 +1,22 @@
resource "aws_cloudwatch_metric_alarm" "tgw-PacketDropCountNoRoute" {
alarm_name = "${var.cw-alarm-prefix}:TGW:PacketDropCountNoRoute:${var.tgw-id}"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "PacketDropCountNoRoute"
period = "300"
statistic = "Average"
threshold = var.threshold-PacketDropCountNoRoute
alarm_description = "TGW:PacketDropCountNoRoute"
namespace = "AWS/TransitGateway"
insufficient_data_actions = []
actions_enabled = var.actions-enabled
alarm_actions = [var.sns-targets.alarm-actions-standard]
ok_actions = [var.sns-targets.alarm-actions-standard]
dimensions = {
TransitGateway = var.tgw-id
}
tags = var.default-tags
lifecycle {
ignore_changes = [tags]
}
}

View File

@ -0,0 +1,9 @@
terraform {
required_version = "~> 1.3.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.36.1"
}
}
}

View File

@ -0,0 +1,7 @@
variable cw-alarm-prefix {}
variable actions-enabled {}
variable tgw-id {}
variable threshold-PacketDropCountNoRoute {}
variable sns-targets {}
variable default-tags {}

View File

@ -0,0 +1,3 @@
#!/bin/bash
RESULTS=$(aws ec2 describe-nat-gateways --query 'NatGateways[].NatGatewayId' --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
jq -n --arg result "$RESULTS" '{"result":$result}'

View File

@ -0,0 +1,3 @@
#!/bin/bash
RESULTS=$(aws ec2 describe-transit-gateways --query 'TransitGateways[].TransitGatewayId' --output text --no-cli-pager | sed 's/\t/\n/g' | sort | xargs)
jq -n --arg result "$RESULTS" '{"result":$result}'