首页 > 解决方案 > 无法通过 SSH 连接到 EC2 实例并且无法放置 ECS 任务

问题描述

给定以下terraform.tf文件:

provider "aws" {
  profile = "default"
  region = "us-east-1"
}


locals {
  vpc_name = "some-vpc-name"
  dev_vpn_source = "*.*.*.*/32"  # Insted of * I have a CIDR block of our VPN here
}

resource "aws_vpc" "vpc" {
  cidr_block = "10.0.0.0/16"
  enable_dns_hostnames = true
  tags = {
    Name: local.vpc_name
  }
}


resource "aws_subnet" "a" {
  cidr_block = "10.0.0.0/17"
  vpc_id = aws_vpc.vpc.id
  tags = {
    Name: "${local.vpc_name}-a"
  }
}

resource "aws_subnet" "b" {
  cidr_block = "10.0.128.0/17"
  vpc_id = aws_vpc.vpc.id
  tags = {
    Name: "${local.vpc_name}-b"
  }
}

resource "aws_security_group" "ssh" {
  name = "${local.vpc_name}-ssh"
  vpc_id = aws_vpc.vpc.id
  tags = {
    Name: "${local.vpc_name}-ssh"
  }
}


resource "aws_security_group_rule" "ingress-ssh" {
  from_port = 22
  protocol = "ssh"
  security_group_id = aws_security_group.ssh.id
  to_port = 22
  type = "ingress"
  cidr_blocks = [local.dev_vpn_source]
  description = "SSH access for developer"
}


resource "aws_security_group" "outbound" {
  name = "${local.vpc_name}-outbound"
  vpc_id = aws_vpc.vpc.id
  tags = {
    Name: "${local.vpc_name}-outbound"
  }
}


resource "aws_security_group_rule" "egress" {
  from_port = 0
  protocol = "all"
  security_group_id = aws_security_group.outbound.id
  to_port = 65535
  type = "egress"
  cidr_blocks = ["0.0.0.0/0"]
  description = "All outbound allowed"
}

module "ecs-clusters" {
  source = "./ecs-clusters/"
  subnets = [aws_subnet.a, aws_subnet.b]
  vpc_name = local.vpc_name
  security_groups = [aws_security_group.ssh, aws_security_group.outbound]
}

以及以下ecs-clusters/ecs-cluster.tf文件:

variable "vpc_name" {
  type = string
}

variable "subnets" {
  type = list(object({
    id: string
  }))
}

variable "security_groups" {
  type = list(object({
    id: string
  }))
}


data "aws_ami" "amazon_linux_ecs" {
  most_recent = true
  owners = ["amazon"]
  filter {
    name   = "name"
    values = ["amzn2-ami-ecs*"]
  }
}

resource "aws_iam_instance_profile" "ecs-launch-profile" {
  name = "${var.vpc_name}-ecs"
  role = "ecsInstanceRole"
}

resource "aws_launch_template" "ecs" {
  name          = "${var.vpc_name}-ecs"
  image_id      = data.aws_ami.amazon_linux_ecs.id
  instance_type = "r5.4xlarge"
  key_name = "some-ssh-key-name"
  iam_instance_profile {
    name = "${var.vpc_name}-ecs"
  }
  block_device_mappings {
    device_name = "/dev/xvda"
    ebs {
      volume_type = "gp3"
      volume_size = 1024
      delete_on_termination = false
    }
  }
  network_interfaces {
    associate_public_ip_address = true
    subnet_id = var.subnets[0].id
    security_groups = var.security_groups[*].id
  }
  update_default_version = true
}

resource "aws_autoscaling_group" "ecs-autoscaling_group" {
  name = "${var.vpc_name}-ecs"
  vpc_zone_identifier = [for subnet in var.subnets: subnet.id]
  desired_capacity   = 1
  max_size           = 1
  min_size           = 1
  protect_from_scale_in = true
  launch_template {
    id = aws_launch_template.ecs.id
    version = aws_launch_template.ecs.latest_version
  }
  tag {
    key = "Name"
    propagate_at_launch = true
    value = "${var.vpc_name}-ecs"
  }
  depends_on = [aws_launch_template.ecs]
}

resource "aws_ecs_capacity_provider" "ecs-capacity-provider" {
  name = var.vpc_name

  auto_scaling_group_provider {
    auto_scaling_group_arn         = aws_autoscaling_group.ecs-autoscaling_group.arn
    managed_termination_protection = "ENABLED"

    managed_scaling {
      maximum_scaling_step_size = 1
      minimum_scaling_step_size = 1
      status                    = "ENABLED"
      target_capacity           = 1
    }
  }
  depends_on = [aws_autoscaling_group.ecs-autoscaling_group]
}


resource "aws_ecs_cluster" "ecs-cluster" {
  name = var.vpc_name
  capacity_providers = [aws_ecs_capacity_provider.ecs-capacity-provider.name]
  depends_on = [aws_ecs_capacity_provider.ecs-capacity-provider]
}

resource "aws_iam_role" "ecs-execution" {
  name = "${var.vpc_name}-ecs-execution"
  assume_role_policy = <<EOF
{
 "Version": "2012-10-17",
 "Statement": [
   {
     "Action": "sts:AssumeRole",
     "Principal": {
       "Service": "ecs-tasks.amazonaws.com"
     },
     "Effect": "Allow",
     "Sid": ""
   }
 ]
}
EOF
}

resource "aws_iam_role" "ecs" {
  name = "${var.vpc_name}-ecs"

  assume_role_policy = <<EOF
{
 "Version": "2012-10-17",
 "Statement": [
   {
     "Action": "sts:AssumeRole",
     "Principal": {
       "Service": "ecs-tasks.amazonaws.com"
     },
     "Effect": "Allow",
     "Sid": ""
   }
 ]
}
EOF
}

resource "aws_iam_role_policy_attachment" "execution-role" {
  role       = aws_iam_role.ecs-execution.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

resource "aws_iam_role_policy_attachment" "role" {
  role       = aws_iam_role.ecs.name
  policy_arn = "arn:aws:iam::aws:policy/AmazonS3FullAccess"
}

我面临两个问题:

我究竟做错了什么?

标签: amazon-web-servicesamazon-ec2terraformamazon-ecsterraform-provider-aws

解决方案


根据评论。

原来的设置有两个问题

  1. 缺少与 ECS 和 ECR 服务的连接,通过在 VPC 中启用互联网访问解决了这个问题。如果不需要互联网访问,也可以将 VPC 接口端点用于 ECS、ECR 和 S3。
  2. 容器实例未注册到 ECS。这已通过使用引导 ECS 实例user_data来修复,以便它们可以注册到 ECS 集群。

推荐阅读