как добавить слой внимания к модели encoder decoder seq2seq?

#keras #deep-learning #nlp #attention-model #seq2seq

Вопрос:

я следую руководству здесь: https://github.com/Pawandeep-prog/keras-seq2seq-chatbot-with-attention/blob/master/seq2seq-chatbot-keras-with-attention.ipynb но я получаю некоторые ошибки

вот мой код

 class AttentionLayer(Layer):
    """
    This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
    There are three sets of weights introduced W_a, U_a, and V_a
     """

    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert isinstance(input_shape, list)
        # Create a trainable weight variable for this layer.

        self.W_a = self.add_weight(name='W_a',
                                   shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.U_a = self.add_weight(name='U_a',
                                   shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                                   initializer='uniform',
                                   trainable=True)
        self.V_a = self.add_weight(name='V_a',
                                   shape=tf.TensorShape((input_shape[0][2], 1)),
                                   initializer='uniform',
                                   trainable=True)

        super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, verbose=False):
        """
        inputs: [encoder_output_sequence, decoder_output_sequence]
        """
        assert type(inputs) == list
        encoder_out_seq, decoder_out_seq = inputs
        if verbose:
            print('encoder_out_seq>', encoder_out_seq.shape)
            print('decoder_out_seq>', decoder_out_seq.shape)

        def energy_step(inputs, states):
            """ Step function for computing energy for a single decoder state
            inputs: (batchsize * 1 * de_in_dim)
            states: (batchsize * 1 * de_latent_dim)
            """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            """ Some parameters required for shaping tensors"""
            en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
            de_hidden = inputs.shape[-1]

            """ Computing S.Wa where S=[s0, s1, ..., si]"""
            # <= batch size * en_seq_len * latent_dim
            W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

            """ Computing hj.Ua """
            U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
            if verbose:
                print('Ua.h>', U_a_dot_h.shape)

            """ tanh(S.Wa   hj.Ua) """
            # <= batch_size*en_seq_len, latent_dim
            Ws_plus_Uh = K.tanh(W_a_dot_s   U_a_dot_h)
            if verbose:
                print('Ws Uh>', Ws_plus_Uh.shape)

            """ softmax(va.tanh(S.Wa   hj.Ua)) """
            # <= batch_size, en_seq_len
            e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
            # <= batch_size, en_seq_len
            e_i = K.softmax(e_i)

            if verbose:
                print('ei>', e_i.shape)

            return e_i, [e_i]

        def context_step(inputs, states):
            """ Step function for computing ci using ei """

            assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
            assert isinstance(states, list) or isinstance(states, tuple), assert_msg

            # <= batch_size, hidden_size
            c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
            if verbose:
                print('ci>', c_i.shape)
            return c_i, [c_i]

        fake_state_c = K.sum(encoder_out_seq, axis=1)
        fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

        """ Computing energy outputs """
        # e_outputs => (batch_size, de_seq_len, en_seq_len)
        last_out, e_outputs, _ = K.rnn(
            energy_step, decoder_out_seq, [fake_state_e],
        )

        """ Computing context vectors """
        last_out, c_outputs, _ = K.rnn(
            context_step, e_outputs, [fake_state_c],
        )

        return c_outputs, e_outputs

    def compute_output_shape(self, input_shape):
        """ Outputs produced by the layer """
        return [
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
            tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
        ]
 

коды для модели кодировщика декодера

 encoder_inputs = Input(shape=(None , ))
encoder_embedding = embedding_layer(encoder_inputs)
encoder_LSTM = Bidirectional(LSTM(256,return_sequences = True, return_state=True, dropout=0.05))
encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_LSTM(encoder_embedding)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
encoder_states = [ state_h , state_c ]

decoder_inputs = Input(shape=(None ,  ))
decoder_embedding = embedding_layer_out(decoder_inputs)
decoder_LSTM = LSTM(512, return_state= True, return_sequences= True, dropout=0.05)
decoder_outputs , _ , _ = decoder_LSTM( decoder_embedding , initial_state=encoder_states )

# attention
attn_layer = AttentionLayer()
#attn_op, attn_state = attn_layer([encoder_outputs, decoder_outputs])
decoder_concat_input = Concatenate(axis=-1)([context, decoder_LSTM])

dec_dense = Dense(VOCABULARY_SIZE, activation='softmax')
final_output = dec_dense(decoder_concat_input)

model = Model([encoder_inputs, decoder_inputs], final_output )
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

model.summary()

model.fit([encoder_input_data , decoder_input_data], decoder_target_data,validation_split=0.2, batch_size=124, epochs=600) #250,300
model.save( 'model.h5' )
model.save_weights('chatbot_weights.h5')
 

коды для модели вывода

 def make_inference_models():
  encoder_model = Model(encoder_inputs, [encoder_outputs, encoder_states])

  decoder_state_input_h = Input(shape=( 512,))
  decoder_state_input_c = Input(shape=( 512,))

  decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

  decoder_outputs, state_h, state_c = decoder_LSTM(decoder_embedding , initial_state=decoder_states_inputs)
  decoder_states = [state_h, state_c]

  decoder_model = Model([decoder_inputs, decoder_states_inputs],[decoder_outputs]   decoder_states)

  return encoder_model , decoder_model
 

and finally to generate the response,

     while not stop_condition :
      dec_outputs , h , c = dec_model.predict([ empty_target_seq ]   states_values )
      
      ##attention
      attn_op, attn_state = attn_layer([enc_op, dec_outputs])
      decoder_concat_input = Concatenate(axis=-1)([dec_outputs, attn_op])
      decoder_concat_input = dec_dense(decoder_concat_input)
      sampled_word_index = np.argmax(decoder_concat_input[0, -1, :] )
      #sampled_word_index = np.argmax( dec_outputs[0, -1, :] )

      sampled_word = None
      for word , index in output_word_dict.items() :
          if sampled_word_index == index :
            decoded_translation  = ' {}'.format( word ) #remove word format #decoded_translation.append(word), decoded_translation = []
            sampled_word = word
      
      if sampled_word == '<END>' or len(decoded_translation.split()) > max_output_length:      
        stop_condition = True

      empty_target_seq = np.zeros( ( 1 , 1 ) )  
      empty_target_seq[ 0 , 0 ] = sampled_word_index
      states_values = [ h , c ] 
      
    print( "Bot:"  decoded_translation.replace('<END>', '') ) #clean decoded trans such that it accepts contractions
    print()

  except:
    print("Bot: Sorry i don't understand!")
    print()
 

please help me out, i don’t understand the error i’m getting!!
ERROR:

 TypeError: 'KerasTensor' object cannot be interpreted as an integer

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-2-da6343914597> in <module>()
    336 attn_layer = AttentionLayer()
    337 #attn_op, attn_state = attn_layer([encoder_outputs, decoder_outputs])
--> 338 attn_op, attn_state = attn_layer([encoder_outputs, decoder_outputs])
    339 decoder_concat_input = Concatenate(axis=-1)([context, decoder_LSTM])
    340





  ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
        205     try:
    --> 206       return target(*args, **kwargs)
        207     except (TypeError, ValueError):
    
    34 frames
    TypeError: 'KerasTensor' object cannot be interpreted as an integer
    
    During handling of the above exception, another exception occurred:
    
    TypeError                                 Traceback (most recent call last)
    /usr/local/lib/python3.7/dist-packages/keras/engine/keras_tensor.py in __array__(self)
        243   def __array__(self):
        244     raise TypeError(
    --> 245         'Cannot convert a symbolic Keras input/output to a numpy array. '
        246         'This error may indicate that you're trying to pass a symbolic value '
        247         'to a NumPy call, which is not supported. Or, '
    
    TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.