Пытаюсь создать классификатор для болезней, но он возвращает неверный вывод

#python #machine-learning #classification

#python #машинное обучение #классификация

Вопрос:

Я пытаюсь создать приложение, которое может получать симптомы пациентов в качестве входных данных и выводить три наиболее вероятных заболевания.

 x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.4)

inputs = keras.Input(shape=(9,))
hidden_1 = keras.layers.Dense(12, activation='selu')(inputs)
hidden_2 = keras.layers.Dense(12, activation='relu')(hidden_1)
outputs = keras.layers.Dense(32, activation='sigmoid')(hidden_2)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

model.summary()

model.fit(x_train, y_train, epochs=200)  # starts training

prediction = model.predict(x_test)
print(prediction)
scores = model.evaluate(x_test, y_test, verbose=0)
print(scores)

print(prediction[0])
print(y_test[0])

model.save("modeldisease.h5")
  

но когда я сохраняю и загружаю эту модель в другой файл python, список выходных данных выглядит следующим образом:

   [[0.000000e 00 0.000000e 00 0.000000e 00 0.000000e 00 0.000000e 00
  1.000000e 00 0.000000e 00 0.000000e 00 1.842211e-21 0.000000e 00
  0.000000e 00 0.000000e 00 0.000000e 00 0.000000e 00 1.000000e 00
  1.000000e 00 1.000000e 00 1.000000e 00 1.000000e 00 0.000000e 00
  0.000000e 00 0.000000e 00 0.000000e 00 1.000000e 00 0.000000e 00
  0.000000e 00 0.000000e 00 0.000000e 00 0.000000e 00 0.000000e 00
  1.000000e 00 0.000000e 00]]
100.0 100.0 100.0
HEPATITE A HEPATITE_ALCOOLICA REFLUXO_GASTROESOFAGICO

Process finished with exit code 0
  

Что я делаю не так?, не должен ли он просто вернуть мне этот список прогнозов со всеми значениями, суммирующимися до 1?
ссылка на набор данных:https://drive.google.com/file/d/1LFCtuBKodGkm1NNdv3QdXOwzlSqdip5q/view?usp=sharing

полный код: `

 from tensorflow import keras
import numpy as np
import sklearn
from sklearn import preprocessing
import pandas as pd

data = pd.read_csv("DATA.csv", sep=";")

obj_data = data.select_dtypes(include=["object"]).copy()
obj_data_names = []

for col in obj_data.columns:
    obj_data_names.append(col)

for col_name in obj_data_names:
    data[col_name] = data[col_name].astype('category')
    data[col_name   "_cat"] = data[col_name].cat.codes
    data[col_name] = data[col_name   "_cat"]
    data.drop(col_name   "_cat", 1, inplace=True)
print(data)
data.fillna(0, inplace=True)
x = np.array(data.drop(["Doenca"], 1))
y = np.array(data["Doenca"])

x = preprocessing.normalize(x)
y = keras.utils.to_categorical(y)

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.4)

inputs = keras.Input(shape=(9,))
hidden_1 = keras.layers.Dense(12, activation='selu')(inputs)
hidden_2 = keras.layers.Dense(12, activation='relu')(hidden_1)
outputs = keras.layers.Dense(32, activation='sigmoid')(hidden_2)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

model.summary()

model.fit(x_train, y_train, epochs=200)  # starts training

prediction = model.predict(x_test)
print(prediction)
scores = model.evaluate(x_test, y_test, verbose=0)
print(scores)

print(prediction[0])
print(y_test[0])

model.save("modeldisease.h5")

for i in range(len(y_test)):
    max_expected = np.amax(y_test[i])
    #print(max_expected)
    y_expected = np.where(y_test[i] == max_expected)
    print(y_expected[0])
    max_predicted = np.amax(prediction[i])
    #print(max_predicted)
    y_predicted = np.where(prediction[i] == max_predicted)
    print(y_predicted[0])

    classes = ["ACNE", "AIDS", "ALERGIA", "ARTRITE", "ARTROSE", "CATAPORA", "COLESTASE", "DENGUE", "DIABETES", "ENXAQUECA", "ESPONDILOSE",
               "FEBRE_TIFOIDE", "GASTROENTERITE", "GRIPE", "HEPATITE_ALCOOLICA", "HEPATITE A", "HEPATITE B", "HEPATITE C", '"HEPATITE D',
               "HEPATITE E", "HIPERTENSAO", "HIPERTENSAO", "HIPERTIROIDISMO", "HIPOGLICEMIA", "HIPOTIREODISMO", "IMPETIGO", "INFECCA_URINA",
               "MALARIA", "PNEUMONIA", "PSORIASE", "REFLUXO_GASTROESOFAGICO", 'TUBERCULOSE', "ULCERA GASTRICA"]
    print(f"Valor esperado: {classes[int(y_expected[0])]}, Valor previsto: {classes[int(y_predicted[0])]}")

# print(prediction[0][0])
# print(round(prediction[0][0]))
# print(classes[round(int(prediction[0][0]))])
#print(y_test)
#print(prediction)
  

загрузить код:

 from tensorflow import keras

model = keras.models.load_model("modeldisease.h5")

result = model.predict([(12, 40, 39, 17, 0, 0, 0, 0, 0)])
print(result)

sortedshit = result[0].argsort()[-3:][::-1]
p1 = result[0][sortedshit[0]]
p2 = result[0][sortedshit[1]]
p3 = result[0][sortedshit[2]]

classes = ["ACNE", "AIDS", "ALERGIA", "ARTRITE", "ARTROSE", "CATAPORA", "COLESTASE", "DENGUE", "DIABETES", "ENXAQUECA",
           "ESPONDILOSE",
           "FEBRE_TIFOIDE", "GASTROENTERITE", "GRIPE", "HEPATITE_ALCOOLICA", "HEPATITE A", "HEPATITE B", "HEPATITE C",
           '"HEPATITE D',
           "HEPATITE E", "HIPERTENSAO", "HIPERTENSAO", "HIPERTIROIDISMO", "HIPOGLICEMIA", "HIPOTIREODISMO", "IMPETIGO",
           "INFECCA_URINA",
           "MALARIA", "PNEUMONIA", "PSORIASE", "REFLUXO_GASTROESOFAGICO", 'TUBERCULOSE', "ULCERA GASTRICA"]

print(p1*100, p2*100, p3*100)

print(classes[sortedshit[0]], classes[sortedshit[1]], classes[sortedshit[2]])
  

Ответ №1:

примените функцию активации softmax вместо sigmoid:

 outputs = keras.Dense(32,activation='softmax')(hidden_2)