UPD: various updates on cloudwatch monitoring from upstream
This commit is contained in:
parent
436b799ff1
commit
1fe92a3f78
@ -43,14 +43,18 @@ No modules.
|
||||
| cwl-region | AWS region where Cloudwatch LogGroup resides. Needed for setting up cwlog-stream-role | `string` | n/a | yes |
|
||||
| dest-bucket-arn | Destination S3 bucket ARN | `string` | n/a | yes |
|
||||
| dest-bucket-kmskey-arn | KMS key ARN for destination bucket | `string` | n/a | yes |
|
||||
| dest-bucket-prefix | S3 object prefix for this stream | `string` | n/a | yes |
|
||||
| dest-bucket-prefix | S3 object prefix for this stream. Please do not start with / end with a /. For example, r53-log/acme.local/ | `string` | n/a | yes |
|
||||
| enable-firehose-errorlog | Enable firehose errorlog | `bool` | `false` | no |
|
||||
| firehose-kmskey-arn | KMS Key arn for Firehose | `string` | n/a | yes |
|
||||
| source-cwlgroup-name | Name of source CloudwatchLog group | `string` | n/a | yes |
|
||||
| stream-name | Name of Kinesis Data Firehose delivery stream | `string` | n/a | yes |
|
||||
|
||||
## Outputs
|
||||
|
||||
No outputs.
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| cloudwatchstream-iam-role-arn | n/a |
|
||||
| firehose-iam-role-arn | n/a |
|
||||
|
||||
---
|
||||
## Authorship
|
||||
|
7
modules/ManagementGovernance/Cwl-firehose-s3/outputs.tf
Normal file
7
modules/ManagementGovernance/Cwl-firehose-s3/outputs.tf
Normal file
@ -0,0 +1,7 @@
|
||||
output firehose-iam-role-arn {
|
||||
value = aws_iam_role.firehose-stream-iam-role.arn
|
||||
}
|
||||
|
||||
output cloudwatchstream-iam-role-arn {
|
||||
value = aws_iam_role.cwlog-stream-role.arn
|
||||
}
|
@ -29,3 +29,46 @@ module "ec2-monitoring" {
|
||||
sns-targets = var.sns-targets
|
||||
}
|
||||
```
|
||||
|
||||
## Sample cloudwatch alarm email notification
|
||||
```
|
||||
Subject: ALARM: "TestAlarmPleaseIgnore" in Asia Pacific (Hong Kong)
|
||||
|
||||
You are receiving this email because your Amazon CloudWatch Alarm "TestAlarmPleaseIgnore" in the
|
||||
Asia Pacific (Hong Kong) region has entered the ALARM state, because "Threshold Crossed: 1 out of
|
||||
the last 1 datapoints [864.0 (24/01/24 00:56:00)] was less than or equal to the threshold (900.0)
|
||||
(minimum 1 datapoint for OK -> ALARM transition)." at "Wednesday 24 January, 2024 01:01:34 UTC".
|
||||
|
||||
View this alarm in the AWS Management Console:
|
||||
https://ap-east-1.console.aws.amazon.com%2Fcloudwatch...
|
||||
|
||||
Alarm Details:
|
||||
- Name: TestAlarmPleaseIgnore
|
||||
- Description: Cloudwatch alarm for the following resource
|
||||
- Instance ID: xxx
|
||||
- Instance Name: yyy
|
||||
- Instance IP: zz.zz.zz.zz
|
||||
- State Change: OK -> ALARM
|
||||
- Reason for State Change: Threshold Crossed: 1 out of the last 1 datapoints [864.0 (24/01/24 00:56:00)] was less than or equal to the threshold (900.0) (minimum 1 datapoint for OK -> ALARM transition).
|
||||
- Timestamp: Wednesday 24 January, 2024 01:01:34 UTC
|
||||
- AWS Account: 111122223333
|
||||
- Alarm Arn: arn:aws:cloudwatch:ap-east-1:111122223333:alarm:TestAlarmPleaseIgnore
|
||||
|
||||
Threshold:
|
||||
- The alarm is in the ALARM state when the metric is LessThanOrEqualToThreshold 900.0 for at least 1 of the last 1 period(s) of 300 seconds.
|
||||
|
||||
Monitored Metric:
|
||||
- MetricNamespace: AWS/EC2
|
||||
- MetricName: CPUCreditBalance
|
||||
- Dimensions: [InstanceId = i-050d4adeafaa53cd0]
|
||||
- Period: 300 seconds
|
||||
- Statistic: Average
|
||||
- Unit: not specified
|
||||
- TreatMissingData: missing
|
||||
|
||||
|
||||
State Change Actions:
|
||||
- OK:
|
||||
- ALARM: [arn:aws:sns:ap-east-1:111122223333:CWA-SNS-Email-KenFong]
|
||||
- INSUFFICIENT_DATA:
|
||||
```
|
@ -1,3 +1,14 @@
|
||||
locals {
|
||||
# alarm-message limited to 1024 characters
|
||||
alarm-message = <<EOF
|
||||
Cloudwatch alarm for the following resource
|
||||
- Instance ID: ${var.ec2-instance-id}
|
||||
- Instance Name: ${data.aws_instance.ec2-instance.tags["Name"]}
|
||||
- Instance IP: ${data.aws_instance.ec2-instance.private_ip}
|
||||
- Instance Type: ${data.aws_instance.ec2-instance.instance_type}
|
||||
EOF
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||
alarm_name = "${var.settings.StatusCheckFailed_System.ecccode}-EC2_${var.ec2-instance-id}-StatusCheckFailed_System"
|
||||
comparison_operator = var.settings.StatusCheckFailed_System.comparison_operator
|
||||
@ -6,7 +17,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_System" {
|
||||
period = var.settings.StatusCheckFailed_System.period
|
||||
statistic = var.settings.StatusCheckFailed_System.statistic
|
||||
threshold = var.settings.StatusCheckFailed_System.threshold
|
||||
alarm_description = "EC2:StatusCheckFailed_System"
|
||||
# alarm_description = "EC2:StatusCheckFailed_System"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -25,7 +37,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-StatusCheckFailed_Instance" {
|
||||
period = var.settings.StatusCheckFailed_Instance.period
|
||||
statistic = var.settings.StatusCheckFailed_Instance.statistic
|
||||
threshold = var.settings.StatusCheckFailed_Instance.threshold
|
||||
alarm_description = "EC2:StatusCheckFailed_Instance"
|
||||
# alarm_description = "EC2:StatusCheckFailed_Instance"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -44,7 +57,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-CPUUtilization" {
|
||||
period = var.settings.CPUUtilization.period
|
||||
statistic = var.settings.CPUUtilization.statistic
|
||||
threshold = var.settings.CPUUtilization.threshold
|
||||
alarm_description = "EC2:CPUUtilization"
|
||||
# alarm_description = "EC2:CPUUtilization"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -61,6 +75,12 @@ data "aws_instance" "ec2-instance" {
|
||||
instance_id = var.ec2-instance-id
|
||||
}
|
||||
|
||||
# put instance name or ip in alarm name
|
||||
locals {
|
||||
instance-ip = data.aws_instance.ec2-instance.private_ip
|
||||
instance-name = data.aws_instance.ec2-instance.tags["Name"]
|
||||
}
|
||||
|
||||
module "ec2_os" {
|
||||
source = "../../util/awscli"
|
||||
access_key = var.target-account-ak
|
||||
@ -90,7 +110,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-mem_used_percent" {
|
||||
period = var.settings.mem_used_percent.period
|
||||
statistic = var.settings.mem_used_percent.statistic
|
||||
threshold = var.settings.mem_used_percent.threshold
|
||||
alarm_description = "EC2:mem_used_percent"
|
||||
# alarm_description = "EC2:mem_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -132,7 +153,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-swap_used_percent" {
|
||||
period = var.settings.swap_used_percent.period
|
||||
statistic = var.settings.swap_used_percent.statistic
|
||||
threshold = var.settings.swap_used_percent.threshold
|
||||
alarm_description = "EC2:swap_used_percent"
|
||||
# alarm_description = "EC2:swap_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -154,7 +176,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent_warn" {
|
||||
period = var.settings.disk_used_percent_warn.period
|
||||
statistic = var.settings.disk_used_percent_warn.statistic
|
||||
threshold = var.settings.disk_used_percent_warn.threshold
|
||||
alarm_description = "EC2:disk_used_percent"
|
||||
# alarm_description = "EC2:disk_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -172,7 +195,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_used_percent_crit" {
|
||||
period = var.settings.disk_used_percent_crit.period
|
||||
statistic = var.settings.disk_used_percent_crit.statistic
|
||||
threshold = var.settings.disk_used_percent_crit.threshold
|
||||
alarm_description = "EC2:disk_used_percent"
|
||||
# alarm_description = "EC2:disk_used_percent"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -190,7 +214,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-disk_inodes_free" {
|
||||
period = var.settings.disk_inodes_free.period
|
||||
statistic = var.settings.disk_inodes_free.statistic
|
||||
threshold = var.settings.disk_inodes_free.threshold
|
||||
alarm_description = "EC2:disk_inodes_free"
|
||||
# alarm_description = "EC2:disk_inodes_free"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -209,7 +234,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-processes_total" {
|
||||
period = var.settings.processes_total.period
|
||||
statistic = var.settings.processes_total.statistic
|
||||
threshold = var.settings.processes_total.threshold
|
||||
alarm_description = "EC2:processes_total"
|
||||
# alarm_description = "EC2:processes_total"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -228,7 +254,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-net_err" {
|
||||
comparison_operator = "GreaterThanThreshold"
|
||||
evaluation_periods = var.settings.net_err_in.evaluation_periods
|
||||
threshold = 0
|
||||
alarm_description = "EC2:net_err_in or EC2:net_err_out exceeds threshold"
|
||||
# alarm_description = "EC2:net_err_in or EC2:net_err_out exceeds threshold"
|
||||
alarm_description = local.alarm-message
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = false
|
||||
alarm_actions = [var.settings.net_err_in.action]
|
||||
@ -276,7 +303,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-net_err" {
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-NetworkIn" {
|
||||
count = try(var.settings.NetworkIn.monitor,false) ? 1 : 0
|
||||
count = try(var.settings.NetworkIn.monitor, false) ? 1 : 0
|
||||
alarm_name = "${var.settings.NetworkIn.ecccode}-EC2_${var.ec2-instance-id}-NetworkIn"
|
||||
comparison_operator = var.settings.NetworkIn.comparison_operator
|
||||
evaluation_periods = var.settings.NetworkIn.evaluation_periods
|
||||
@ -284,7 +311,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-NetworkIn" {
|
||||
period = var.settings.NetworkIn.period
|
||||
statistic = var.settings.NetworkIn.statistic
|
||||
threshold = var.settings.NetworkIn.threshold
|
||||
alarm_description = "EC2:NetworkIn"
|
||||
# alarm_description = "EC2:NetworkIn"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -296,7 +324,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2-NetworkIn" {
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_metric_alarm" "ec2-NetworkOut" {
|
||||
count = try(var.settings.NetworkIn.monitor,false) ? 1 : 0
|
||||
count = try(var.settings.NetworkIn.monitor, false) ? 1 : 0
|
||||
alarm_name = "${var.settings.NetworkOut.ecccode}-EC2_${var.ec2-instance-id}-NetworkOut"
|
||||
comparison_operator = var.settings.NetworkOut.comparison_operator
|
||||
evaluation_periods = var.settings.NetworkOut.evaluation_periods
|
||||
@ -304,7 +332,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-NetworkOut" {
|
||||
period = var.settings.NetworkOut.period
|
||||
statistic = var.settings.NetworkOut.statistic
|
||||
threshold = var.settings.NetworkOut.threshold
|
||||
alarm_description = "EC2:NetworkOut"
|
||||
# alarm_description = "EC2:NetworkOut"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "AWS/EC2"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -325,7 +354,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-MemoryCommittedPct" {
|
||||
period = var.settings.MemoryCommittedPct.period
|
||||
statistic = var.settings.MemoryCommittedPct.statistic
|
||||
threshold = var.settings.MemoryCommittedPct.threshold
|
||||
alarm_description = "EC2:MemoryCommittedBytes"
|
||||
# alarm_description = "EC2:MemoryCommittedBytes"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
@ -348,7 +378,8 @@ resource "aws_cloudwatch_metric_alarm" "ec2-LogicalDiskFreePct" {
|
||||
period = var.settings.LogicalDiskFreePct.period
|
||||
statistic = var.settings.LogicalDiskFreePct.statistic
|
||||
threshold = var.settings.LogicalDiskFreePct.threshold
|
||||
alarm_description = "EC2:OsDiskFreePct"
|
||||
# alarm_description = "EC2:OsDiskFreePct"
|
||||
alarm_description = local.alarm-message
|
||||
namespace = "CWAgent"
|
||||
insufficient_data_actions = []
|
||||
actions_enabled = var.actions-enabled
|
||||
|
Loading…
Reference in New Issue
Block a user