首页 > 解决方案 > 使用 Amazon SageMaker 部署模型时出现问题(ValueError:Estimator 未与训练作业关联)

问题描述

我是 AWS 新手,我正在尝试本教程 ( https://aws.amazon.com/getting-started/hands-on/build-train-deploy-machine-learning-model-sagemaker/ ) - 构建、训练、并使用 Amazon SageMaker 部署机器学习模型。

当我尝试部署模型时,我不断收到此错误 -> ValueError: Estimator is not associated with a training job。

下面是代码。对此有任何帮助将不胜感激。

# import libraries
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np                                
import pandas as pd                               
import matplotlib.pyplot as plt                   
from IPython.display import Image                 
from IPython.display import display               
from time import gmtime, strftime                 
from sagemaker.predictor import csv_serializer   

# Define IAM role
role = get_execution_role()
prefix = 'sagemaker/DEMO-xgboost-dm'
containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
my_region = boto3.session.Session().region_name # set the region of the instance
print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")


bucket_name = 'xxxxxxx' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
s3 = boto3.resource('s3')
try:
    if  my_region == 'us-east-1':
      s3.create_bucket(Bucket=bucket_name)
    else: 
      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)

try:
  urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
  print('Success: downloaded bank_clean.csv.')
except Exception as e:
  print('Data load error: ',e)

try:
  model_data = pd.read_csv('./bank_clean.csv',index_col=0)
  print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)


train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
print(train_data.shape, test_data.shape)


pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')


sess = sagemaker.Session()
xgb = sagemaker.estimator.Estimator(containers[my_region],role, instance_count=1, instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)
xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)


xgb.fit({'train': s3_input_train})


xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

最后一行是产生以下错误的原因


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-14-ecdb827f43a2> in <module>
----> 1 xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/estimator.py in deploy(self, initial_instance_count, instance_type, serializer, deserializer, accelerator_type, endpoint_name, use_compiled_model, wait, model_name, kms_key, data_capture_config, tags, **kwargs)
    900         """
    901         removed_kwargs("update_endpoint", kwargs)
--> 902         self._ensure_latest_training_job()
    903         self._ensure_base_job_name()
    904         default_name = name_from_base(self.base_job_name)

~/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/estimator.py in _ensure_latest_training_job(self, error_message)
   1251         """Placeholder docstring"""
   1252         if self.latest_training_job is None:
-> 1253             raise ValueError(error_message)
   1254 
   1255     delete_endpoint = removed_function("delete_endpoint")

ValueError: Estimator is not associated with a training job

标签: pythonamazon-web-servicesjupyter-notebookdata-scienceamazon-sagemaker

解决方案


推荐阅读