#python #tensorflow #keras #google-cloud-platform #tpu
#python #tensorflow #keras #google-облачная платформа #tpu
Вопрос:
Я пытаюсь инициализировать resnet50 в качестве основы для модели в TF 1.15, и модель запускается в Google TPU V2. Мой код такой:
backbone_model=tf.keras.applications.ResNet50(include_top=False, weights='imagenet',pooling=None)
Я получаю следующие ошибки.
<pre><code>
E1014 09:57:09.458413 140497105635136 tpu.py:425] Operation of type Placeholder (input_1) is not supported on the TPU. Execution will fail if this op is used in the graph.
app.run(main)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/absl/app.py", line 300, in run
_run_main(main, args)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "/home/usman/nas/tpu.py", line 232, in main
max_steps=FLAGS.train_steps)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3035, in train
rendezvous.raise_errors()
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/error_handling.py", line 136, in raise_errors
six.reraise(typ, value, traceback)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/six.py", line 703, in reraise
raise value
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3030, in train
saving_listeners=saving_listeners)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1191, in _train_model_default
features, labels, ModeKeys.TRAIN, self.config)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 2857, in _call_model_fn
config)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1149, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3159, in _model_fn
_train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3604, in _train_on_tpu_system
device_assignment=ctx.device_assignment)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/tpu/tpu.py", line 1277, in split_compile_and_shard
name=name)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/tpu/tpu.py", line 992, in split_compile_and_replicate
outputs = computation(*computation_inputs)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3589, in multi_tpu_train_steps_on_single_shard
inputs=[0, _INITIAL_LOSS])
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/tpu/training_loop.py", line 178, in while_loop
condition_wrapper, body_wrapper, inputs, name="", parallel_iterations=1)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/ops/control_flow_ops.py", line 2753, in while_loop
return_same_structure)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/ops/control_flow_ops.py", line 2245, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/ops/control_flow_ops.py", line 2170, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/tpu/training_loop.py", line 121, in body_wrapper
outputs = body(*(inputs dequeue_ops))
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 3588, in <lambda>
lambda i, loss: [i 1, single_tpu_train_step(i)],
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 1715, in train_step
self._call_model_fn(features, labels))
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/tpu/tpu_estimator.py", line 1994, in _call_model_fn
estimator_spec = self._model_fn(features=features, **kwargs)
File "/home/usman/nas/tpu.py", line 120, in model
pooling=None)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/applications/__init__.py", line 49, in wrapper
return base_fun(*args, **kwargs)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/applications/resnet.py", line 33, in ResNet50
return resnet.ResNet50(*args, **kwargs)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/keras_applications/resnet_common.py", line 435, in ResNet50
**kwargs)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/keras_applications/resnet_common.py", line 411, in ResNet
model.load_weights(weights_path)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 182, in load_weights
return super(Model, self).load_weights(filepath, by_name)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py", line 1373, in load_weights
saving.load_weights_from_hdf5_group(f, self.layers)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 693, in load_weights_from_hdf5_group
K.batch_set_value(weight_value_tuples)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py", line 3259, in batch_set_value
get_session().run(assign_ops, feed_dict=feed_dict)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py", line 486, in get_session
_initialize_variables(session)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py", line 903, in _initialize_variables
[variables_module.is_variable_initialized(v) for v in candidate_vars])
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 956, in run
run_metadata_ptr)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 1165, in _run
self._graph, fetches, feed_dict_tensor, feed_handles=feed_handles)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 488, in __init__
self._assert_fetchable(graph, fetch.op)
File "/home/usman/anaconda3/envs/py37/lib/python3.7/site-packages/tensorflow_core/python/client/session.py", line 505, in _assert_fetchable
% op.name)
tensorflow.python.framework.errors_impl.InaccessibleTensorError: Operation 'VarIsInitializedOp' has been marked as not fetchable. Typically this happens when it is defined in another function or code block. Use return values,explicit Python locals or TensorFlow collections to access it.
</code></pre>
Из того, что я искал, следующее является самым близким предположением к ошибке, которую я получаю, невозможно получить результат операции, созданной внутри тела цикла while, потому что тело может выполняться 0 или более раз, в зависимости от условия цикла. Чтобы получить значение из цикла, вам нужно вернуть его из функции body (как одну из переменных цикла), а его конечное значение после всех итераций будет возвращено из tf.while_loop() . Но это происходит внутри кода tf, а не извне, потому что я запускаю только однострочный код для инициализации модели.
Комментарии:
1. Добро пожаловать в SO. Пожалуйста, опубликуйте полную обратную трассировку.
2. Да , я отредактировал его
3. Вот список доступных операций TensorFlow: cloud.google.com/tpu/docs/tensorflow-ops на Облачном ТПУ
4. да, я проверил, что ошибка op доступна в виде графика op на TPU.