From 3dd3f0b2c3542a64a42ecca7741391fd3e003817 Mon Sep 17 00:00:00 2001 From: xpk Date: Sat, 6 Jan 2024 00:34:25 +0800 Subject: [PATCH] NEW: working example of EMR with spot instances --- experimental/emr/main.tf | 197 ++++++++++++++++++++++++++++++ experimental/emr/outputs.tf | 7 ++ experimental/emr/provider.tf | 23 ++++ experimental/emr/terraform.tfvars | 6 + experimental/emr/variables.tf | 9 ++ 5 files changed, 242 insertions(+) create mode 100644 experimental/emr/main.tf create mode 100644 experimental/emr/outputs.tf create mode 100644 experimental/emr/provider.tf create mode 100644 experimental/emr/terraform.tfvars create mode 100644 experimental/emr/variables.tf diff --git a/experimental/emr/main.tf b/experimental/emr/main.tf new file mode 100644 index 0000000..77d4fe8 --- /dev/null +++ b/experimental/emr/main.tf @@ -0,0 +1,197 @@ +locals { + name = "${var.environment}-${var.customer-name}" +} + +module "emr" { + source = "terraform-aws-modules/emr/aws" + version = "1.2.0" + + name = "${local.name}-emr" + release_label = "emr-7.0.0" + security_configuration_name = aws_emr_security_configuration.security_config.name + applications = ["hbase", "phoenix"] + auto_termination_policy = { + idle_timeout = 3600 + } + + bootstrap_action = { + } + + configurations_json = jsonencode([ + { + Classification : "hbase-env", + Configurations : [ + { + "Classification" : "export", + "Properties" : { + "HBASE_MASTER_OPTS" : "-Xmx4g", + "HBASE_REGIONSERVER_OPTS" : "-Xmx8g" + } + } + ], + Properties : {} + }, + { + Classification : "hbase-site", + Properties : { + "hbase.regionserver.handler.count" : "300" + } + } + ]) + + master_instance_fleet = { + name = "master-fleet" + target_on_demand_capacity = 1 + instance_type_configs = [ + { + instance_type = "c6g.xlarge" + } + ] + } + + core_instance_fleet = { + name = "core-fleet" + target_on_demand_capacity = 0 + target_spot_capacity = 2 + instance_type_configs = [ + { + bid_price_as_percentage_of_on_demand_price = 70 + instance_type = "c6g.xlarge" + weighted_capacity = 1 + ebs_config = { + size = 20 + type = "gp3" + volumes_per_instance = 1 + } + }, + { + bid_price_as_percentage_of_on_demand_price = 70 + instance_type = "m6g.xlarge" + weighted_capacity = 1 + ebs_config = { + size = 20 + type = "gp3" + volumes_per_instance = 1 + } + } + ] + launch_specifications = { + spot_specification = { + allocation_strategy = "capacity-optimized" + block_duration_minutes = 0 + timeout_action = "SWITCH_TO_ON_DEMAND" + timeout_duration_minutes = 5 + } + } + } + + ebs_root_volume_size = 20 + # Subnets should be tagged with + # { "for-use-with-amazon-emr-managed-policies" = true } + ec2_attributes = { + subnet_ids = ["subnet-08dec6787782ee087", "subnet-0551e96ffd016192a"] + key_name = "kf-key" + } + vpc_id = "vpc-01a10b033169f89a8" + + # Required for creating public cluster + is_private_cluster = false + + keep_job_flow_alive_when_no_steps = true + list_steps_states = ["PENDING", "RUNNING", "CANCEL_PENDING", "CANCELLED", "FAILED", "INTERRUPTED", "COMPLETED"] + log_uri = "s3n://${module.s3_bucket.s3_bucket_id}/" + + scale_down_behavior = "TERMINATE_AT_TASK_COMPLETION" + step_concurrency_level = 3 + termination_protection = false + visible_to_all_users = true + service_iam_role_policies = { + AmazonEMRServicePolicy_v2 = "arn:aws:iam::aws:policy/service-role/AmazonEMRServicePolicy_v2" + PowerUser = "arn:aws:iam::aws:policy/PowerUserAccess" + } +} + +resource "random_id" "this" { + byte_length = 2 +} + +module "s3_bucket" { + source = "terraform-aws-modules/s3-bucket/aws" + version = "~> 3.0" + + bucket = "${local.name}-emrlogs-${random_id.this.dec}" + + # Allow deletion of non-empty bucket + # Example usage only - not recommended for production + force_destroy = true + + attach_deny_insecure_transport_policy = true + attach_require_latest_tls_policy = true + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true + + server_side_encryption_configuration = { + rule = { + apply_server_side_encryption_by_default = { + sse_algorithm = "AES256" + } + } + } +} + + +resource "aws_emr_security_configuration" "security_config" { + name = "${local.name}-emr-security-config" + + configuration = jsonencode( + { + "EncryptionConfiguration" : { + "AtRestEncryptionConfiguration" : { + "S3EncryptionConfiguration" : { + "EncryptionMode" : "SSE-S3" + }, + }, + "EnableInTransitEncryption" : false, + "EnableAtRestEncryption" : true + } + } + ) +} + +# Tag EMR master and core instances +# Need to run this layer twice to set instance tags +# Adding depends_on will results in dependency loop +data "aws_instances" "master_instances" { + # depends_on = [module.emr] + instance_tags = { + "aws:elasticmapreduce:instance-group-role" = "MASTER" + } + instance_state_names = ["running"] +} + +data "aws_instances" "core_instances" { + # depends_on = [module.emr] + instance_tags = { + "aws:elasticmapreduce:instance-group-role" = "CORE" + } + instance_state_names = ["running"] +} + +resource "aws_ec2_tag" "tag-emr-core-instances" { + # depends_on = [data.aws_instances.core_instances] + count = length(data.aws_instances.core_instances.ids) + resource_id = sort(data.aws_instances.core_instances.ids)[count.index] + key = "Name" + value = "${local.name}-emr-core-${count.index + 1}" +} + +resource "aws_ec2_tag" "tag-emr-master-instances" { + # depends_on = [data.aws_instances.master_instances] + count = length(data.aws_instances.master_instances.ids) + resource_id = sort(data.aws_instances.master_instances.ids)[count.index] + key = "Name" + value = "${local.name}-emr-master-${count.index + 1}" +} \ No newline at end of file diff --git a/experimental/emr/outputs.tf b/experimental/emr/outputs.tf new file mode 100644 index 0000000..6878b2d --- /dev/null +++ b/experimental/emr/outputs.tf @@ -0,0 +1,7 @@ +output "core_instance_ids" { + value = data.aws_instances.core_instances.ids +} + +output "master_instance_ids" { + value = data.aws_instances.master_instances.ids +} \ No newline at end of file diff --git a/experimental/emr/provider.tf b/experimental/emr/provider.tf new file mode 100644 index 0000000..e0ccfdc --- /dev/null +++ b/experimental/emr/provider.tf @@ -0,0 +1,23 @@ +provider "aws" { + region = var.aws-region + default_tags { + tags = { + ServiceProvider = "RackspaceTechnology" + Environment = var.environment + Project = var.project + Application = var.application + TerraformMode = "managed" + TerraformDir = "${reverse(split("/", path.cwd))[1]}/${reverse(split("/", path.cwd))[0]}" + } + } +} + +terraform { + required_version = ">= 1.3.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0.0" + } + } +} diff --git a/experimental/emr/terraform.tfvars b/experimental/emr/terraform.tfvars new file mode 100644 index 0000000..3a0f989 --- /dev/null +++ b/experimental/emr/terraform.tfvars @@ -0,0 +1,6 @@ +aws-region = "ap-east-1" +# aws-region-short = "ape1" +customer-name = "ken2026" +environment = "lab" +project = "iac" +application = "emr" diff --git a/experimental/emr/variables.tf b/experimental/emr/variables.tf new file mode 100644 index 0000000..7250bed --- /dev/null +++ b/experimental/emr/variables.tf @@ -0,0 +1,9 @@ +variable "aws-region" {} +# variable "aws-region-short" {} +variable "customer-name" {} +variable "environment" {} +variable "project" {} +variable "application" {} +locals { + resource-prefix = "${var.environment}-${substr(var.aws-region, 0, 2)}-${var.customer-name}-${var.project}" +} \ No newline at end of file