#python #tensorflow #machine-learning #keras #classification
#python #tensorflow #машинное обучение #keras #классификация
Вопрос:
Я следую вместе с Лоуренсом Морони в tensorflow, кодируя по ходу. Вот видео:https://www.youtube.com/watch?v=Y_hzMnRXjhI
Я получаю это сообщение об ошибке:
Traceback (most recent call last):
File "tensorTest.py", line 66, in <module>
validation_data=(testing_padded, testing_labels), verbose=2)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1063, in fit
steps_per_execution=self._steps_per_execution)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1117, in __init__
model=model)
File "/Users/elliot/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 282, in __init__
raise ValueError(msg)
ValueError: Data cardinality is ambiguous:
x sizes: 28618
y sizes: 14309
Please provide data which shares the same first dimension.
Я могу получить сводку модели просто отлично, но когда она пытается соответствовать, я получаю ошибку. Полный код ниже:
import tensorflow as tf from tensorflow import keras from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import pandas as pd
data = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)
sentences = data['headline'].to_list() labels = data['is_sarcastic'].to_list() urls = data['article_link'].to_list()
vocab_size = 10000 embedding_dim = 16 max_length = 100 trunc_type='post' padding_type='post' oov_tok = "<OOV>" training_size
= 14309
training_sentences = sentences[0:training_size] testing_sentences = sentences[:training_size] training_labels = labels[0:training_size] testing_labels = labels[training_size:]
tokenizer = Tokenizer(oov_token = oov_tok) tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
training_sequences = tokenizer.texts_to_sequences(sentences) training_padded = pad_sequences(training_sequences, padding=padding_type, maxlen=max_length, truncating=trunc_type)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences) testing_padded = pad_sequences(testing_sequences, padding=padding_type, maxlen=max_length, truncating=trunc_type)
import numpy as np training_padded = np.array(training_padded) training_labels = np.array(training_labels) testing_padded = np.array(testing_padded) testing_labels = np.array(testing_labels)
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(24, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid') ])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary()
num_epochs = 30
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)
print(history)
ЗАРАНЕЕ СПАСИБО!
Комментарии:
1. вероятно, это testing_sentences = предложения[training_size:], а не testing_sentences = предложения [:training_size]
2. попробовал это, но все еще получаю ту же ошибку. в любом случае спасибо
3. @GreenCauliflower Ваша проблема теперь решена? Еще, можете ли вы поделиться результатами training_padded.shape и training_labels.shape?