#python #keras #lstm #encoder-decoder
#питон #keras #lstm #кодер-декодер
Вопрос:
Я создаю кодер-декодер для преобразования ASR в предложения. Для этого у меня есть X
y
данные и, которые я разделил. Вот как y
это выглядит:
y[0:3] gt;gt; array([[ 1, 13, 14, 15, 5, 16, 17, 2, 0, 0, 0, 0, 0], [ 1, 18, 6, 19, 2, 0, 0, 0, 0, 0, 0, 0, 0], [ 1, 20, 21, 7, 22, 23, 2, 0, 0, 0, 0, 0, 0]], dtype=int32)
В приведенном выше выводе 1
для "_START"
и 2
для "_STOP_"
; затем я сделал отступ, чтобы получить фиксированную длину.
Вот как выглядят данные о поездах:
X_train.shape, y_train.shape gt;gt;((18, 128, 335), (18, 13))
Вот как выглядят данные проверки:
X_val.shape, y_val.shape ((2, 128, 335), (2, 13))
Чтобы настроить форму ввода, у меня есть эти:
num_encoder_tokens, num_decoder_tokens ((128, 335), 13)
Вот модель, которую я пытаюсь запустить:
# Let's set up encoder encoder_inputs = Input(shape=num_encoder_tokens); encoder = LSTM(latent_dim, return_state=True); encoder_outputs, state_h, state_c = encoder(encoder_inputs); encoder_states = [state_h, state_c]; # Let's set up the decoder. decoder_inputs = Input(shape=(None, num_decoder_tokens)); decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True); decoder_outputs, _, _ = decoder_lstm(decoder_inputs,initial_state=encoder_states); decoder_dense = Dense(num_decoder_tokens, activation='softmax'); decoder_outputs = decoder_dense(decoder_outputs); # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
Вот архитектура:
Model: "model" __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) [(None, 128, 335)] 0 __________________________________________________________________________________________________ input_2 (InputLayer) [(None, None, 13)] 0 __________________________________________________________________________________________________ lstm (LSTM) [(None, 64), (None, 102400 input_1[0][0] __________________________________________________________________________________________________ lstm_1 (LSTM) [(None, None, 64), ( 19968 input_2[0][0] lstm[0][1] lstm[0][2] __________________________________________________________________________________________________ dense (Dense) (None, None, 13) 845 lstm_1[0][0] ================================================================================================== Total params: 123,213 Trainable params: 123,213 Non-trainable params: 0
Code to fit is:
H = model.fit([X_train, y_train], y_train, validation_data=(X_val, y_val), batch_size=32, callbacks=cb, epochs=5, verbose=1);
I get this error:
Epoch 1/5 WARNING:tensorflow:Model was constructed with shape (None, None, 13) for input KerasTensor(type_spec=TensorSpec(shape=(None, None, 13), dtype=tf.float32, name='input_2'), name='input_2', description="created by layer 'input_2'"), but it was called on an input with incompatible shape (None, 13). --------------------------------------------------------------------------- ValueError Traceback (most recent call last) lt;ipython-input-63-4b36fc438462gt; in lt;modulegt; 3 H = model.fit([X_train, y_train], y_train[:,1:], 4 validation_data=(X_val, y_val), batch_size=32, ----gt; 5 callbacks=cb, epochs=5, verbose=1); ~/.local/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing) 1156 _r=1): 1157 callbacks.on_train_batch_begin(step) -gt; 1158 tmp_logs = self.train_function(iterator) 1159 if data_handler.should_sync: 1160 context.async_wait() ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds) 887 888 with OptionalXlaContext(self._jit_compile): --gt; 889 result = self._call(*args, **kwds) 890 891 new_tracing_count = self.experimental_get_tracing_count() ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds) 931 # This is the first call of __call__, so we have to initialize. 932 initializers = [] --gt; 933 self._initialize(args, kwds, add_initializers_to=initializers) 934 finally: 935 # At this point we know that the initialization is complete (or less ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to) 762 self._concrete_stateful_fn = ( 763 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access --gt; 764 *args, **kwds)) 765 766 def invalid_creator_scope(*unused_args, **unused_kwds): ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs) 3048 args, kwargs = None, None 3049 with self._lock: -gt; 3050 graph_function, _ = self._maybe_define_function(args, kwargs) 3051 return graph_function 3052 ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3442 3443 self._function_cache.missed.add(call_context_key) -gt; 3444 graph_function = self._create_graph_function(args, kwargs) 3445 self._function_cache.primary[cache_key] = graph_function 3446 ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3287 arg_names=arg_names, 3288 override_flat_arg_shapes=override_flat_arg_shapes, -gt; 3289 capture_by_value=self._capture_by_value), 3290 self._function_attributes, 3291 function_spec=self.function_spec, ~/.local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes) 997 _, original_func = tf_decorator.unwrap(python_func) 998 --gt; 999 func_outputs = python_func(*func_args, **func_kwargs) 1000 1001 # invariant: `func_outputs` contains only Tensors, CompositeTensors, ~/.local/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds) 670 # the function a weak reference to itself to avoid a reference cycle. 671 with OptionalXlaContext(compile_with_xla): --gt; 672 out = weak_wrapped_fn().__wrapped__(*args, **kwds) 673 return out 674 ~/.local/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs) 984 except Exception as e: # pylint:disable=broad-except 985 if hasattr(e, "ag_error_metadata"): --gt; 986 raise e.ag_error_metadata.to_exception(e) 987 else: 988 raise ValueError: in user code: /home/fogang/.local/lib/python3.7/site-packages/keras/engine/training.py:830 train_function * return step_function(self, iterator) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/training.py:813 run_step * outputs = model.train_step(data) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/training.py:770 train_step * y_pred = self(x, training=True) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/base_layer.py:989 __call__ * input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/functional.py:416 call * inputs, training=training, mask=mask) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/functional.py:551 _run_internal_graph * outputs = node.layer(*args, **kwargs) /home/fogang/.local/lib/python3.7/site-packages/keras/layers/recurrent.py:717 __call__ * return super(RNN, self).__call__(inputs, **kwargs) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/base_layer.py:989 __call__ * input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) /home/fogang/.local/lib/python3.7/site-packages/keras/engine/input_spec.py:212 assert_input_compatibility * raise ValueError('Input ' str(input_index) ' of layer ' ValueError: Input 0 of layer lstm_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 13)
Любое предложение, чтобы исправить это.