terraform.aws-baseline-infra/examples/emr/main.tf

227 lines
6.5 KiB
Terraform
Raw Normal View History

locals {
name = "${var.environment}-${var.customer-name}"
}
module "emr" {
source = "terraform-aws-modules/emr/aws"
version = "1.2.0"
name = "${local.name}-emr"
release_label = "emr-7.0.0"
security_configuration_name = aws_emr_security_configuration.security_config.name
applications = ["hbase", "phoenix"]
auto_termination_policy = {
idle_timeout = 3600
}
bootstrap_action = {
}
configurations_json = jsonencode([
{
Classification : "hbase-env",
Configurations : [
{
"Classification" : "export",
"Properties" : {
"HBASE_MASTER_OPTS" : "-Xmx4g",
"HBASE_REGIONSERVER_OPTS" : "-Xmx8g"
}
}
],
Properties : {}
},
{
Classification : "hbase-site",
Properties : {
"hbase.regionserver.handler.count" : "300"
}
}
])
master_instance_fleet = {
name = "master-fleet"
target_on_demand_capacity = 1
instance_type_configs = [
{
instance_type = "c6g.xlarge"
ebs_config = {
size = 20
type = "gp3"
volumes_per_instance = 1
}
}
]
}
core_instance_fleet = {
name = "core-fleet"
target_on_demand_capacity = 0
target_spot_capacity = 1
instance_type_configs = [
{
bid_price_as_percentage_of_on_demand_price = 70
instance_type = "c6g.xlarge"
weighted_capacity = 1
ebs_config = {
size = 20
type = "gp3"
volumes_per_instance = 1
}
},
{
bid_price_as_percentage_of_on_demand_price = 70
instance_type = "m6g.xlarge"
weighted_capacity = 1
ebs_config = {
size = 20
type = "gp3"
volumes_per_instance = 1
}
}
]
launch_specifications = {
spot_specification = {
allocation_strategy = "capacity-optimized"
block_duration_minutes = 0
timeout_action = "SWITCH_TO_ON_DEMAND"
timeout_duration_minutes = 5
}
}
}
ebs_root_volume_size = 20
# Subnets should be tagged with
# { "for-use-with-amazon-emr-managed-policies" = true }
ec2_attributes = {
subnet_ids = ["subnet-08dec6787782ee087", "subnet-0551e96ffd016192a"]
key_name = "kf-key"
}
vpc_id = "vpc-01a10b033169f89a8"
# Required for creating public cluster
is_private_cluster = false
keep_job_flow_alive_when_no_steps = true
list_steps_states = ["PENDING", "RUNNING", "CANCEL_PENDING", "CANCELLED", "FAILED", "INTERRUPTED", "COMPLETED"]
log_uri = "s3n://${module.s3_bucket.s3_bucket_id}/"
scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION"
step_concurrency_level = 3
termination_protection = false
visible_to_all_users = true
service_iam_role_policies = {
AmazonEMRServicePolicy_v2 = "arn:aws:iam::aws:policy/service-role/AmazonEMRServicePolicy_v2"
PowerUser = "arn:aws:iam::aws:policy/PowerUserAccess"
}
iam_instance_profile_policies = {
AmazonElasticMapReduceforEC2Role = "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"
PowerUser = "arn:aws:iam::aws:policy/PowerUserAccess"
}
# Use managed scaling policy to refill spot instances
managed_scaling_policy = {
unit_type = "InstanceFleetUnits"
minimum_capacity_units = 1
maximum_capacity_units = 4
maximum_ondemand_capacity_units = 0
maximum_core_capacity_units = 4
}
}
resource "random_id" "this" {
byte_length = 2
}
module "s3_bucket" {
source = "terraform-aws-modules/s3-bucket/aws"
version = "~> 3.0"
bucket = "${local.name}-emrlogs-${random_id.this.dec}"
# Allow deletion of non-empty bucket
# Example usage only - not recommended for production
force_destroy = true
attach_deny_insecure_transport_policy = true
attach_require_latest_tls_policy = true
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
server_side_encryption_configuration = {
rule = {
apply_server_side_encryption_by_default = {
sse_algorithm = "AES256"
}
}
}
}
resource "aws_kms_key" "ebs" {
description = "KMS key for EBS volumes"
deletion_window_in_days = 7
}
resource "aws_emr_security_configuration" "security_config" {
name = "${local.name}-emr-security-config"
configuration = jsonencode(
{
EncryptionConfiguration = {
AtRestEncryptionConfiguration = {
LocalDiskEncryptionConfiguration = {
AwsKmsKey = aws_kms_key.ebs.arn
EnableEbsEncryption = true
EncryptionKeyProviderType = "AwsKms"
}
S3EncryptionConfiguration = {
EncryptionMode = "SSE-S3"
}
}
EnableAtRestEncryption = true
EnableInTransitEncryption = false
}
InstanceMetadataServiceConfiguration = {
HttpPutResponseHopLimit = 1
MinimumInstanceMetadataServiceVersion = 2
}
}
)
}
# Tag EMR master and core instances
# Need to run this layer twice to set instance tags
# Adding depends_on will results in dependency loop
data "aws_instances" "master_instances" {
# depends_on = [module.emr]
instance_tags = {
"aws:elasticmapreduce:instance-group-role" = "MASTER"
}
instance_state_names = ["running"]
}
data "aws_instances" "core_instances" {
# depends_on = [module.emr]
instance_tags = {
"aws:elasticmapreduce:instance-group-role" = "CORE"
}
instance_state_names = ["running"]
}
resource "aws_ec2_tag" "tag-emr-core-instances" {
# depends_on = [data.aws_instances.core_instances]
count = length(data.aws_instances.core_instances.ids)
resource_id = sort(data.aws_instances.core_instances.ids)[count.index]
key = "Name"
value = "${local.name}-emr-core-${count.index + 1}"
}
resource "aws_ec2_tag" "tag-emr-master-instances" {
# depends_on = [data.aws_instances.master_instances]
count = length(data.aws_instances.master_instances.ids)
resource_id = sort(data.aws_instances.master_instances.ids)[count.index]
key = "Name"
value = "${local.name}-emr-master-${count.index + 1}"
}