тензорный поток, RNN> большая разница между точностью и val_accuracy (или потерей и val_loss)

#python #tensorflow #machine-learning #keras #deep-learning

Вопрос:

Я изучаю tensorflow и действительно не понимаю, почему мой результат сильно отличается от результата тренировочного набора. Короче говоря, это так странно, что у меня точность 1,0 в каждую эпоху. Для справки я использую «bbc-text.csv» от Kaggle.

Мой код выглядит следующим образом

 import tensorflow as tf
import csv
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"
training_portion = .8

sentences = []
labels = []
stopwords = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ]
print(len(stopwords))

with open("C:\Users\uidv6361\machine_learning_basic\bbc-text.csv") as csvfile: 
    bong  = csv.reader(csvfile, delimiter = ',')
    next(bong)
    for row in bong: 
        labels.append(row[0])
        sentence = row[1]
        for word in stopwords: 
            token = " "   word   " "
            sentence = sentence.replace(token, " ")
        sentences.append(sentence)

train_size = int(len(sentences)* training_portion)

train_sentences = sentences[:train_size]
train_labels = labels[:train_size]

validation_sentences = sentences[train_size: ]
validation_labels = labels[train_size: ]

tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok)
tokenizer.fit_on_texts(train_sentences)
word_index = tokenizer.word_index

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences, padding = padding_type, maxlen = max_length)

validation_sequences = tokenizer.texts_to_sequences(validation_sentences)
validation_padded = pad_sequences(validation_sequences, padding=padding_type, maxlen=max_length)

label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)

training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))
validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels))

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length = max_length), 
    tf.keras.layers.GlobalAveragePooling1D(), 
    tf.keras.layers.Dense(24, activation = 'relu'), 
    tf.keras.layers.Dense(6, activation = 'softmax')
])
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.summary()

num_epochs = 30
bong = model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)

import matplotlib.pyplot as plt

def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'  string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'  string])
    plt.show()
    
plot_graphs(bong, "accuracy")
plot_graphs(bong, "loss")

Почему история показывает точность только 1,0? и показывает большую разницу с набором проверки?

Мне интересно, не ошибся ли я в своем коде.

Вопрос:

Вам также может понравиться

Инициализация «многомерного» объекта в javascript

лаборатория jupyter:500(Ошибка интернет-сервера ) Я не могу установить плагины

mysql_fetch_array иногда дает сбой