#keras #deep-learning #nlp #attention-model #seq2seq
Вопрос:
я следую руководству здесь: https://github.com/Pawandeep-prog/keras-seq2seq-chatbot-with-attention/blob/master/seq2seq-chatbot-keras-with-attention.ipynb но я получаю некоторые ошибки
вот мой код
class AttentionLayer(Layer):
"""
This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
There are three sets of weights introduced W_a, U_a, and V_a
"""
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# Create a trainable weight variable for this layer.
self.W_a = self.add_weight(name='W_a',
shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
initializer='uniform',
trainable=True)
self.U_a = self.add_weight(name='U_a',
shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
initializer='uniform',
trainable=True)
self.V_a = self.add_weight(name='V_a',
shape=tf.TensorShape((input_shape[0][2], 1)),
initializer='uniform',
trainable=True)
super(AttentionLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, inputs, verbose=False):
"""
inputs: [encoder_output_sequence, decoder_output_sequence]
"""
assert type(inputs) == list
encoder_out_seq, decoder_out_seq = inputs
if verbose:
print('encoder_out_seq>', encoder_out_seq.shape)
print('decoder_out_seq>', decoder_out_seq.shape)
def energy_step(inputs, states):
""" Step function for computing energy for a single decoder state
inputs: (batchsize * 1 * de_in_dim)
states: (batchsize * 1 * de_latent_dim)
"""
assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
assert isinstance(states, list) or isinstance(states, tuple), assert_msg
""" Some parameters required for shaping tensors"""
en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
de_hidden = inputs.shape[-1]
""" Computing S.Wa where S=[s0, s1, ..., si]"""
# <= batch size * en_seq_len * latent_dim
W_a_dot_s = K.dot(encoder_out_seq, self.W_a)
""" Computing hj.Ua """
U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1) # <= batch_size, 1, latent_dim
if verbose:
print('Ua.h>', U_a_dot_h.shape)
""" tanh(S.Wa hj.Ua) """
# <= batch_size*en_seq_len, latent_dim
Ws_plus_Uh = K.tanh(W_a_dot_s U_a_dot_h)
if verbose:
print('Ws Uh>', Ws_plus_Uh.shape)
""" softmax(va.tanh(S.Wa hj.Ua)) """
# <= batch_size, en_seq_len
e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
# <= batch_size, en_seq_len
e_i = K.softmax(e_i)
if verbose:
print('ei>', e_i.shape)
return e_i, [e_i]
def context_step(inputs, states):
""" Step function for computing ci using ei """
assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
assert isinstance(states, list) or isinstance(states, tuple), assert_msg
# <= batch_size, hidden_size
c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
if verbose:
print('ci>', c_i.shape)
return c_i, [c_i]
fake_state_c = K.sum(encoder_out_seq, axis=1)
fake_state_e = K.sum(encoder_out_seq, axis=2) # <= (batch_size, enc_seq_len, latent_dim
""" Computing energy outputs """
# e_outputs => (batch_size, de_seq_len, en_seq_len)
last_out, e_outputs, _ = K.rnn(
energy_step, decoder_out_seq, [fake_state_e],
)
""" Computing context vectors """
last_out, c_outputs, _ = K.rnn(
context_step, e_outputs, [fake_state_c],
)
return c_outputs, e_outputs
def compute_output_shape(self, input_shape):
""" Outputs produced by the layer """
return [
tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
]
коды для модели кодировщика декодера
encoder_inputs = Input(shape=(None , ))
encoder_embedding = embedding_layer(encoder_inputs)
encoder_LSTM = Bidirectional(LSTM(256,return_sequences = True, return_state=True, dropout=0.05))
encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_LSTM(encoder_embedding)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
encoder_states = [ state_h , state_c ]
decoder_inputs = Input(shape=(None , ))
decoder_embedding = embedding_layer_out(decoder_inputs)
decoder_LSTM = LSTM(512, return_state= True, return_sequences= True, dropout=0.05)
decoder_outputs , _ , _ = decoder_LSTM( decoder_embedding , initial_state=encoder_states )
# attention
attn_layer = AttentionLayer()
#attn_op, attn_state = attn_layer([encoder_outputs, decoder_outputs])
decoder_concat_input = Concatenate(axis=-1)([context, decoder_LSTM])
dec_dense = Dense(VOCABULARY_SIZE, activation='softmax')
final_output = dec_dense(decoder_concat_input)
model = Model([encoder_inputs, decoder_inputs], final_output )
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()
model.fit([encoder_input_data , decoder_input_data], decoder_target_data,validation_split=0.2, batch_size=124, epochs=600) #250,300
model.save( 'model.h5' )
model.save_weights('chatbot_weights.h5')
коды для модели вывода
def make_inference_models():
encoder_model = Model(encoder_inputs, [encoder_outputs, encoder_states])
decoder_state_input_h = Input(shape=( 512,))
decoder_state_input_c = Input(shape=( 512,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_LSTM(decoder_embedding , initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_model = Model([decoder_inputs, decoder_states_inputs],[decoder_outputs] decoder_states)
return encoder_model , decoder_model
and finally to generate the response,
while not stop_condition :
dec_outputs , h , c = dec_model.predict([ empty_target_seq ] states_values )
##attention
attn_op, attn_state = attn_layer([enc_op, dec_outputs])
decoder_concat_input = Concatenate(axis=-1)([dec_outputs, attn_op])
decoder_concat_input = dec_dense(decoder_concat_input)
sampled_word_index = np.argmax(decoder_concat_input[0, -1, :] )
#sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
sampled_word = None
for word , index in output_word_dict.items() :
if sampled_word_index == index :
decoded_translation = ' {}'.format( word ) #remove word format #decoded_translation.append(word), decoded_translation = []
sampled_word = word
if sampled_word == '<END>' or len(decoded_translation.split()) > max_output_length:
stop_condition = True
empty_target_seq = np.zeros( ( 1 , 1 ) )
empty_target_seq[ 0 , 0 ] = sampled_word_index
states_values = [ h , c ]
print( "Bot:" decoded_translation.replace('<END>', '') ) #clean decoded trans such that it accepts contractions
print()
except:
print("Bot: Sorry i don't understand!")
print()
please help me out, i don’t understand the error i’m getting!!
ERROR:
TypeError: 'KerasTensor' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-2-da6343914597> in <module>()
336 attn_layer = AttentionLayer()
337 #attn_op, attn_state = attn_layer([encoder_outputs, decoder_outputs])
--> 338 attn_op, attn_state = attn_layer([encoder_outputs, decoder_outputs])
339 decoder_concat_input = Concatenate(axis=-1)([context, decoder_LSTM])
340
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
34 frames
TypeError: 'KerasTensor' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/keras/engine/keras_tensor.py in __array__(self)
243 def __array__(self):
244 raise TypeError(
--> 245 'Cannot convert a symbolic Keras input/output to a numpy array. '
246 'This error may indicate that you're trying to pass a symbolic value '
247 'to a NumPy call, which is not supported. Or, '
TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.