314 lines
13 KiB
HCL
314 lines
13 KiB
HCL
data "aws_caller_identity" "this" {}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-CPUUtilization" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:CPUUtilization:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "3"
|
||
metric_name = "CPUUtilization"
|
||
period = "1800"
|
||
statistic = "Average"
|
||
threshold = var.threshold-CPUUtilization
|
||
alarm_description = "ES:CPUUtilization"
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-SearchLatency" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:SearchLatency:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "3"
|
||
metric_name = "SearchLatency"
|
||
period = "1800"
|
||
statistic = "Average"
|
||
threshold = var.threshold-SearchLatency
|
||
alarm_description = "ES:SearchLatency"
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-IndexingLatency" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:IndexingLatency:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "3"
|
||
metric_name = "IndexingLatency"
|
||
period = "1800"
|
||
statistic = "Average"
|
||
threshold = var.threshold-IndexingLatency
|
||
alarm_description = "ES:IndexingLatency"
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-ClusterStatusRed" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:ClusterStatusRed:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "ClusterStatus.red"
|
||
period = "900"
|
||
statistic = "Maximum"
|
||
threshold = 0
|
||
alarm_description = "At least one primary shard and its replicas aren't allocated to a node."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-ThreadpoolWriteQueue" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:ThreadpoolWriteQueue:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "ThreadpoolWriteQueue"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-ThreadpoolWriteQueue
|
||
alarm_description = "The cluster is experiencing high indexing concurrency. Review and control indexing requests, or increase cluster resources."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-standard]
|
||
ok_actions = [var.sns-targets.alarm-actions-standard]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-ThreadpoolSearchQueue" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:ThreadpoolSearchQueue:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "ThreadpoolSearchQueue"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-ThreadpoolSearchQueue
|
||
alarm_description = "The cluster is experiencing high search concurrency. Consider scaling your cluster. You can also increase the search queue size, but increasing it excessively can cause out of memory errors."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-standard]
|
||
ok_actions = [var.sns-targets.alarm-actions-standard]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-ThreadpoolSearchRejected" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:ThreadpoolSearchRejected:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "ThreadpoolSearchRejected"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-ThreadpoolSearchRejected
|
||
alarm_description = "These alarms notify you of domain issues that might impact performance and stability."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-ThreadpoolWriteRejected" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:ThreadpoolWriteRejected:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "ThreadpoolWriteRejected"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-ThreadpoolWriteRejected
|
||
alarm_description = "These alarms notify you of domain issues that might impact performance and stability."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-MasterCPUUtilization" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:MasterCPUUtilization:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "MasterCPUUtilization"
|
||
period = "300"
|
||
statistic = "Average"
|
||
threshold = var.threshold-MasterCPUUtilization
|
||
alarm_description = "MasterCPUUtilization"
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-MasterJVMMemoryPressure" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:MasterJVMMemoryPressure:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "MasterJVMMemoryPressure"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-MasterJVMMemoryPressure
|
||
alarm_description = "MasterJVMMemoryPressure"
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-JVMMemoryPressure" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:JVMMemoryPressure:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "JVMMemoryPressure"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-JVMMemoryPressure
|
||
alarm_description = "The cluster could encounter out of memory errors if usage increases. Consider scaling vertically. OpenSearch Service uses half of an instance's RAM for the Java heap, up to a heap size of 32 GiB. You can scale instances vertically up to 64 GiB of RAM, at which point you can scale horizontally by adding instances."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-ClusterIndexWritesBlocked" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:ClusterIndexWritesBlocked:${var.domain-name}"
|
||
comparison_operator = "GreaterThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "ClusterIndexWritesBlocked"
|
||
period = "60"
|
||
statistic = "Average"
|
||
threshold = var.threshold-ClusterIndexWritesBlocked
|
||
alarm_description = "Your cluster is blocking write requests. See ClusterBlockException."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "ES-FreeStorageSpace" {
|
||
alarm_name = "${var.cw-alarm-prefix}:ES:FreeStorageSpace:${var.domain-name}"
|
||
comparison_operator = "LessThanThreshold"
|
||
evaluation_periods = "2"
|
||
metric_name = "FreeStorageSpace"
|
||
period = "300"
|
||
statistic = "Average"
|
||
threshold = var.threshold-FreeStorageSpace
|
||
alarm_description = "A node in your cluster is low on free storage space."
|
||
namespace = "AWS/ES"
|
||
insufficient_data_actions = []
|
||
actions_enabled = var.actions-enabled
|
||
alarm_actions = [var.sns-targets.alarm-actions-urgent]
|
||
ok_actions = [var.sns-targets.alarm-actions-urgent]
|
||
dimensions = {
|
||
DomainName = var.domain-name
|
||
ClientId = data.aws_caller_identity.this.id
|
||
}
|
||
tags = var.default-tags
|
||
lifecycle {
|
||
ignore_changes = [tags]
|
||
}
|
||
}
|