2022-10-26 11:13:56 +08:00
data " aws_caller_identity " " this " { }
resource " aws_cloudwatch_metric_alarm " " ES-CPUUtilization " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:CPUUtilization: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 3 "
metric_name = " CPUUtilization "
period = " 1800 "
statistic = " Average "
threshold = var . threshold - CPUUtilization
alarm_description = " ES:CPUUtilization "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
2022-11-10 11:16:24 +08:00
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
2022-10-26 11:13:56 +08:00
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-SearchLatency " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:SearchLatency: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 3 "
metric_name = " SearchLatency "
period = " 1800 "
statistic = " Average "
threshold = var . threshold - SearchLatency
alarm_description = " ES:SearchLatency "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
2022-11-10 11:16:24 +08:00
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
2022-10-26 11:13:56 +08:00
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-IndexingLatency " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:IndexingLatency: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 3 "
metric_name = " IndexingLatency "
period = " 1800 "
statistic = " Average "
threshold = var . threshold - IndexingLatency
alarm_description = " ES:IndexingLatency "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
2022-11-10 11:16:24 +08:00
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
2022-10-26 11:13:56 +08:00
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-ClusterStatusRed " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:ClusterStatusRed: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " ClusterStatus.red "
period = " 900 "
statistic = " Maximum "
threshold = 0
alarm_description = " At least one primary shard and its replicas aren't allocated to a node. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
2022-11-10 11:16:24 +08:00
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
2022-10-26 11:13:56 +08:00
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
2022-12-19 14:45:23 +08:00
resource " aws_cloudwatch_metric_alarm " " ES-ThreadpoolWriteQueue " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:ThreadpoolWriteQueue: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " ThreadpoolWriteQueue "
period = " 60 "
statistic = " Average "
threshold = var . threshold - ThreadpoolWriteQueue
alarm_description = " The cluster is experiencing high indexing concurrency. Review and control indexing requests, or increase cluster resources. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - standard ]
ok_actions = [ var . sns - targets . alarm - actions - standard ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-ThreadpoolSearchQueue " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:ThreadpoolSearchQueue: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " ThreadpoolSearchQueue "
period = " 60 "
statistic = " Average "
threshold = var . threshold - ThreadpoolSearchQueue
alarm_description = " The cluster is experiencing high search concurrency. Consider scaling your cluster. You can also increase the search queue size, but increasing it excessively can cause out of memory errors. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - standard ]
ok_actions = [ var . sns - targets . alarm - actions - standard ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-ThreadpoolSearchRejected " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:ThreadpoolSearchRejected: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " ThreadpoolSearchRejected "
period = " 60 "
statistic = " Average "
threshold = var . threshold - ThreadpoolSearchRejected
alarm_description = " These alarms notify you of domain issues that might impact performance and stability. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-ThreadpoolWriteRejected " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:ThreadpoolWriteRejected: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " ThreadpoolWriteRejected "
period = " 60 "
statistic = " Average "
threshold = var . threshold - ThreadpoolWriteRejected
alarm_description = " These alarms notify you of domain issues that might impact performance and stability. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-MasterCPUUtilization " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:MasterCPUUtilization: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " MasterCPUUtilization "
period = " 300 "
statistic = " Average "
threshold = var . threshold - MasterCPUUtilization
alarm_description = " MasterCPUUtilization "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-MasterJVMMemoryPressure " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:MasterJVMMemoryPressure: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " MasterJVMMemoryPressure "
period = " 60 "
statistic = " Average "
threshold = var . threshold - MasterJVMMemoryPressure
alarm_description = " MasterJVMMemoryPressure "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-JVMMemoryPressure " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:JVMMemoryPressure: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " JVMMemoryPressure "
period = " 60 "
statistic = " Average "
threshold = var . threshold - JVMMemoryPressure
alarm_description = " The cluster could encounter out of memory errors if usage increases. Consider scaling vertically. OpenSearch Service uses half of an instance's RAM for the Java heap, up to a heap size of 32 GiB. You can scale instances vertically up to 64 GiB of RAM, at which point you can scale horizontally by adding instances. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-ClusterIndexWritesBlocked " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:ClusterIndexWritesBlocked: ${ var . domain - name } "
comparison_operator = " GreaterThanThreshold "
evaluation_periods = " 2 "
metric_name = " ClusterIndexWritesBlocked "
period = " 60 "
statistic = " Average "
threshold = var . threshold - ClusterIndexWritesBlocked
alarm_description = " Your cluster is blocking write requests. See ClusterBlockException. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}
resource " aws_cloudwatch_metric_alarm " " ES-FreeStorageSpace " {
alarm_name = " ${ var . cw - alarm - prefix } :ES:FreeStorageSpace: ${ var . domain - name } "
comparison_operator = " LessThanThreshold "
evaluation_periods = " 2 "
metric_name = " FreeStorageSpace "
period = " 300 "
statistic = " Average "
threshold = var . threshold - FreeStorageSpace
alarm_description = " A node in your cluster is low on free storage space. "
namespace = " AWS/ES "
insufficient_data_actions = [ ]
actions_enabled = var . actions - enabled
alarm_actions = [ var . sns - targets . alarm - actions - urgent ]
ok_actions = [ var . sns - targets . alarm - actions - urgent ]
dimensions = {
DomainName = var . domain - name
ClientId = data . aws_caller_identity . this . id
}
tags = var . default - tags
lifecycle {
ignore_changes = [ tags ]
}
}