首页 > 解决方案 > 尝试在登陆区设置中创建 Azure Databricks 群集时,Terraform 抛出错误

问题描述

在此处输入图像描述我正在尝试使用 Terraform 部署 Azure Databricks 工作区和集群。工作区与用户组一起成功创建,我能够成功登录 Databricks。我遇到的问题是在创建集群时。Terraform 应用在大约 20 分钟左右后失败。“public_network_access_enabled”在我的工作区的 terraform 代码中设置为“True”

如果我查看 Azure DB 的控制台,我会在 Compute 下看到该状态处于挂起状态,并且它会保持挂起状态大约 20 分钟,然后一切都消失了,terraform apply 在下面抛出此错误消息

仅供参考,我们在登陆区运行,我为 Databricks 创建了两个子网,一个私有子网,一个使用 /26 的公共子网 vnet 与中心 vnet 对等,就像它在任何登陆区一样。

[![1114-001756-9y2ijklp is not able to transition from TERMINATED to RUNNING: Instance was not reachable. This can be a transient networking issue. If the problem persists, this usually indicates a network environment misconfiguration. Please check your cloud provider config..., Termination info : code: INSTANCE_UNREACHABLE, type: , parameters: map\[databricks_error_message:
    Instance was not reachable.
    VM extension code: ProvisioningState/succeded
    instanceId: InstanceId(939d10dc729547c5bedb7cf32be7ecd1)
    workerEnv: workerenv-3791028417964786][1]][1]

其他详细信息(可能会被截断):

其余错误附在屏幕截图中。

只是为了通知,我尝试从 Azure 门户以交互方式登录 Databricks,并尝试手动创建集群,它成功了。它成功意味着我至少可以看到我在尝试使用 Terraform 部署时看不到的集群。但问题是当我尝试启动集群时,它尝试启动但大约 10 分钟左右后,它显示已终止

如果我检查 Databricks 中的事件日志,它会显示:

集群终止。原因:实例无法访问

我的地形代码:

resource "azurerm_databricks_workspace" "db-workspace" {
  name                          = module.names-db-workspace.environment.databricks_workspace.name_unique
  resource_group_name           = module.resourcegroup.resource_group.name
  location                      = module.resourcegroup.resource_group.location
  sku                           = "premium"
  public_network_access_enabled = true

  custom_parameters {
    no_public_ip                                         = false
    virtual_network_id                                   = module.virtualnetwork["centralus"].virtual_network.self.id
    public_subnet_name                                   = module.virtualnetwork["centralus"].virtual_network.subnets["db-sub-1-public"].name
    private_subnet_name                                  = module.virtualnetwork["centralus"].virtual_network.subnets["db-sub-2-private"].name
    public_subnet_network_security_group_association_id  = module.virtualnetwork["centralus"].virtual_network.nsgs.associations.subnets["databricks-public-nsg-db-sub-1-public"].id
    private_subnet_network_security_group_association_id = module.virtualnetwork["centralus"].virtual_network.nsgs.associations.subnets["databricks-private-nsg-db-sub-2-private"].id
  }
  tags = local.tags
}

resource "databricks_cluster" "dbcselfservice" {
  cluster_name            = format("adb-cluster-%s-%s", var.project.name, var.project.environment.name)
  spark_version           = var.spark_version
  node_type_id            = var.node_type_id
  autotermination_minutes = 20
  autoscale {
    min_workers = 1
    max_workers = 7
  }
  azure_attributes {
    availability       = "SPOT_AZURE"
    first_on_demand    = 1
    spot_bid_max_price = 100
  }
  depends_on = [
    azurerm_databricks_workspace.db-workspace
  ]
}




locals {
  vnet = {
    enable = true
    subnets = {
      general = {
        cidrs   = [cidrsubnet(var.project.cidrs["centralus-default"][0], 2, 0)]
        private = { endpoint = true, service = false }
        service = { endpoints = [
          "Microsoft.KeyVault",
          "Microsoft.Storage",
          "Microsoft.Web",
          "Microsoft.EventHub",
          "Microsoft.Sql",
          "Microsoft.AzureCosmosDB"
          ],
          delegations = {}
        }
      }
      webapp = {
        cidrs   = [cidrsubnet(var.project.cidrs["centralus-default"][0], 2, 1)]
        private = { endpoint = false, service = false }
        service = { endpoints = [], delegations = {
          "Microsoft.Web/serverFarms" = {
            actions = ["Microsoft.Network/virtualNetworks/subnets/action"]
            name    = "Microsoft.Web/serverFarms"
          }
          }
        }
      }
      # waf = {
      #   cidrs   = [cidrsubnet(var.project.cidrs["centralus-default"][0], 4, 8)]
      #   private = { endpoint = false, service = false }
      #   service = { endpoints = [], delegations = {}
      #   }
      # }
      db-sub-1-public = {
        cidrs   = [cidrsubnet(var.project.cidrs["centralus-default"][0], 2, 2)]
        private = { endpoint = false, service = false }
        service = {
          endpoints = [],
          delegations = {
            "Microsoft.Databricks/workspaces" = {
              actions = ["Microsoft.Network/virtualNetworks/subnets/join/action", "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action"]
              name    = "Microsoft.Databricks/workspaces"
            }
          }
        }
      }
      db-sub-2-private = {
        cidrs   = [cidrsubnet(var.project.cidrs["centralus-default"][0], 2, 3)]
        private = { endpoint = false, service = false }
        service = {
          endpoints = [],
          delegations = {
            "Microsoft.Databricks/workspaces" = {
              actions = ["Microsoft.Network/virtualNetworks/subnets/join/action", "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action"]
              name    = "Microsoft.Databricks/workspaces"
            }
          }
        }
      }
    }
    asgs = {}
    nsgs = {
      databricks-public-nsg = {
        subnets = ["db-sub-1-public"]
        rules = {
          #   "databricks-worker-to-webapp" = {
          #     priority  = 101
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["443"]
          #       asgs   = []
          #       prefix = "AzureDatabricks"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-to-storage" = {
          #     priority  = 104
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["443"]
          #       asgs   = []
          #       prefix = "Storage"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-to-sql" = {
          #     priority  = 106
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["3306"]
          #       asgs   = []
          #       prefix = "Sql"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-within-cluster" = {
          #     priority  = 100
          #     access    = "Allow"
          #     direction = "Inbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-within-cluster" = {
          #     priority  = 108
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-to-event-hubs" = {
          #     priority  = 110
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["9093"]
          #       asgs   = []
          #       prefix = "Eventhubs"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
        }
      }
      databricks-private-nsg = {
        subnets = ["db-sub-2-private"]
        rules = {
          #   "databricks-worker-to-webapp" = {
          #     priority  = 101
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["443"]
          #       asgs   = []
          #       prefix = "AzureDatabricks"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-to-storage" = {
          #     priority  = 104
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["443"]
          #       asgs   = []
          #       prefix = "Storage"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-to-sql" = {
          #     priority  = 106
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["3306"]
          #       asgs   = []
          #       prefix = "Sql"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-within-cluster" = {
          #     priority  = 100
          #     access    = "Allow"
          #     direction = "Inbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-within-cluster" = {
          #     priority  = 108
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
          #   }
          #   "databricks-worker-to-event-hubs" = {
          #     priority  = 110
          #     access    = "Allow"
          #     direction = "Outbound"
          #     protocol  = "tcp"
          #     destination = {
          #       ports  = ["9093"]
          #       asgs   = []
          #       prefix = "Eventhubs"
          #     }
          #     source = {
          #       ports  = ["*"]
          #       asgs   = []
          #       prefix = "VirtualNetwork"
          #     }
        }
      }
    }
  }
}

 module "virtualnetwork" {
  version                         = "~> 7.0"
  source                          = "contoso.com/virtual-network/azurerm"
  for_each                        = (local.vnet.enable) ? { centralus = "Central US" } : {}
  providers                       = { azurerm = azurerm, azurerm.hub = azurerm.hub, random = random }
  hub_resource_group_name         = var.project.hub.resourcegroup.name
  hub_virtual_hub_name            = var.project.hub.virtualhub.name
  hub_ddos_protection_plan_name   = var.project.hub.ddosprotectionplan.name
  resource_group_name             = module.resourcegroup.resource_group.name
  virtual_network_location        = each.value
  virtual_network_environment     = var.project.environment.name
  virtual_network_name            = var.project.name
  virtual_network_tags            = module.resourcegroup.resource_group.tags
  virtual_network_cidrs           = var.project.cidrs[format("%s-default", each.key)]
  virtual_network_subnets         = local.vnet.subnets
  application_security_groups     = local.vnet.asgs
  virtual_network_security_groups = local.vnet.nsgs
}

标签: azureterraformdatabricksazure-databricksterraform-provider-azure

解决方案


推荐阅读