python-3.x - tf.keras.optimizers.Adam 与 Tensorflow 2.0.beta 中的 tf.estimator 模型正在崩溃
问题描述
我正在使用Tensorflow 2.0.beta
with Python 3.6.6
on Mac OS
(每晚:tf-nightly-2.0-preview
2.0.0.dev20190721
但我从未设法让它与中的 compat 模块一起使用Tensorflow 2.0
)。
我正在准备将tf.estimator
模型从Tensorflow 1.12
(完全工作)迁移到Tensorflow 2.0
. 这是代码:
# estimator model
def baseline_estimator_model(features, labels, mode, params):
"""
Model function for Estimator
"""
print('model based on keras layer but return an estimator model')
# gettings the bulding blocks
model = keras_building_blocks(params['dim_input'], params['num_classes'])
dense_inpout = features['dense_input']
# Logits layer
if mode == tf.estimator.ModeKeys.TRAIN:
logits = model(dense_inpout, training=True)
else:
logits = model(dense_inpout, training=False)
# Compute predictions
probabilities = tf.nn.softmax(logits)
classes = tf.argmax(input=probabilities, axis=1, )
# made prediction
predictions = {
'classes': classes,
'probabilities': probabilities,
}
# to be tested
predictions_output = tf.estimator.export.PredictOutput(predictions)
# Provide an estimator spec for `ModeKeys.PREDICT`
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode,
predictions=predictions,
export_outputs={tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: predictions_output})
# Compute loss for both TRAIN and EVAL modes
# old -> loss = tf.compat.v1.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)(labels, logits)
# Generate necessary evaluation metrics
# old -> accuracy = tf.compat.v1.metrics.accuracy(labels=tf.argmax(input=labels, axis=1), predictions=classes, name='accuracy')
accuracy = tf.keras.metrics.CategoricalAccuracy()
accuracy.update_state(labels, logits)
eval_metrics = {'accuracy': accuracy}
tf.summary.scalar('accuracy', accuracy.result())
# Provide an estimator spec for `ModeKeys.EVAL`
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode=mode,
loss=loss,
eval_metric_ops=eval_metrics)
# Provide an estimator spec for `ModeKeys.TRAIN`
if mode == tf.estimator.ModeKeys.TRAIN:
# old but working -> optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001, beta1=0.9)
# crashing
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, epsilon=1e-07)
# old -> train_op = optimizer.minimize(loss, tf.compat.v1.train.get_or_create_global_step())
train_op = optimizer.minimize(loss,var_list=model.weights)
return tf.estimator.EstimatorSpec(mode=mode,
loss=loss,
train_op=train_op)
预测=预测,损失=损失,train_op=train_op,export_outputs=predictions_output)
如果我保留 compat.v1 模块,它正在工作:
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001, beta1=0.9)
如果我尝试使用没有 compat.v1 的东西,它会崩溃:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9,epsilon=1e-07)
出现以下错误(我暂时在本地运行代码,而不是在GCP
):
I0721 17:33:04.812453 4526515648 estimator.py:209] Using config: {'_model_dir': 'results/Models/Mnist/tf_1_12/estimator/v3/ckpt/', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': 10, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
rewrite_options {
meta_optimizer_iterations: ONE
}
}
, '_keep_checkpoint_max': 3, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 50, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1c37b11b70>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
I0721 17:33:04.815697 4526515648 estimator_training.py:186] Not using Distribute Coordinator.
I0721 17:33:04.817899 4526515648 training.py:612] Running training and evaluation locally (non-distributed).
I0721 17:33:04.818665 4526515648 training.py:700] Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 10 or save_checkpoints_secs None.
I0721 17:33:04.834385 4526515648 model.py:211] input_dataset_fn: TRAIN, train
using keras layer and estimator (recommended way)
exporter <tensorflow_estimator.python.estimator.exporter.LatestExporter object at 0x1c37b115f8>
I0721 17:33:05.117963 4526515648 estimator.py:1145] Calling model_fn.
model based on keras layer but return an estimator model
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<timed exec> in <module>
~/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/src/model_mnist_2_0_v1/trainer/model.py in train_and_evaluate(FLAGS, use_keras)
589 exporters=exporter)
590
--> 591 tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
592
593 def train_and_evaluate_old(FLAGS, use_keras):
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/training.py in train_and_evaluate(estimator, train_spec, eval_spec)
471 '(with task id 0). Given task id {}'.format(config.task_id))
472
--> 473 return executor.run()
474
475
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/training.py in run(self)
611 config.task_type != run_config_lib.TaskType.EVALUATOR):
612 logging.info('Running training and evaluation locally (non-distributed).')
--> 613 return self.run_local()
614
615 # Distributed case.
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/training.py in run_local(self)
712 max_steps=self._train_spec.max_steps,
713 hooks=train_hooks,
--> 714 saving_listeners=saving_listeners)
715
716 eval_result = listener_for_eval.eval_result or _EvalResult(
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
365
366 saving_listeners = _check_listeners_type(saving_listeners)
--> 367 loss = self._train_model(input_fn, hooks, saving_listeners)
368 logging.info('Loss for final step: %s.', loss)
369 return self
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1156 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1157 else:
-> 1158 return self._train_model_default(input_fn, hooks, saving_listeners)
1159
1160 def _train_model_default(self, input_fn, hooks, saving_listeners):
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py in _train_model_default(self, input_fn, hooks, saving_listeners)
1186 worker_hooks.extend(input_hooks)
1187 estimator_spec = self._call_model_fn(
-> 1188 features, labels, ModeKeys.TRAIN, self.config)
1189 global_step_tensor = training_util.get_global_step(g)
1190 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1144
1145 logging.info('Calling model_fn.')
-> 1146 model_fn_results = self._model_fn(features=features, **kwargs)
1147 logging.info('Done calling model_fn.')
1148
~/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/src/model_mnist_2_0_v1/trainer/model.py in baseline_estimator_model(features, labels, mode, params)
442 #train_op = optimizer.minimize(loss, tf.compat.v1.train.get_or_create_global_step())
443 #train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step())
--> 444 train_op = optimizer.minimize(loss,var_list=model.weights)
445
446 print('step 8')
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in minimize(self, loss, var_list, grad_loss, name)
315 """
316 grads_and_vars = self._compute_gradients(
--> 317 loss, var_list=var_list, grad_loss=grad_loss)
318
319 return self.apply_gradients(grads_and_vars, name=name)
~/anaconda-release/conda-env/env_gcp_dl_2_0_nightly/lib/python3.6/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in _compute_gradients(self, loss, var_list, grad_loss)
349 if not callable(var_list):
350 tape.watch(var_list)
--> 351 loss_value = loss()
352 if callable(var_list):
353 var_list = var_list()
TypeError: 'Tensor' object is not callable
知道如何解决吗?自Tensorflow 2.0 alpha
.
我也在寻找 tf.estimator 的完整工作示例Tensorflow 2.0
。我也有导出模型的问题。在他们的官方文档中,Tensorflow 2.0
他们只在他们的示例中使用compat.v1
并且不导出模型。GCP 的所有关于 tf.estimator 的在线课程都使用旧版本的 Tensorflow (1.12 - 1.14)。
解决方案
推荐阅读
- python - 匹配两个熊猫系列:如何从另一个系列中的一个系列中找到一个字符串元素,然后创建一个新列
- javascript - 尝试进行 api 调用但获取状态:0
- azure - 在构建中使用 Azure 构建管道日志
- applescript - 这是类型不匹配吗
- python - 如何解决此问题不支持嵌套重命名器
- javascript - 应用上下文 TypeError 时出错:无法读取 null 错误的属性“getReferences”
- mysql - 如何选择对应行中前 5 个列的五个值?
- apache-kafka - MongoDB Kafka 源连接器抛出 java.lang.IllegalStateException: Queue full when using copy.existing: true
- cookies - ngx-cookie-service 是否是对 Angular Universal 应用程序进行身份验证的完整方法
- python - 如何在 Python 和 OpenCV 中检测一系列闪烁的灯光