#python #python-3.x #neural-network
#питон #python-3.x #нейронная сеть
Вопрос:
При попытке запустить свой код я получаю следующую ошибку:
line 165, in back_propagation dl_wrt_A2 = dl_wrt_z3.dot(self.params['W3'].T) ValueError: shapes (111,7) and (1,7) not aligned: 7 (dim 1) != 1 (dim 0)
Я уверен, что в моем обратном распространении, производных и кодировании softmax есть ошибки. Код приведен ниже. Я не новичок в кодировании, но я должен писать код с нуля, не используя продвинутые библиотеки Python. Любая помощь будет очень признательна!
import pandas as pd import numpy as np from pandas import DataFrame from numpy import nan import matplotlib.pyplot as plt from numpy.random import randn from sklearn.metrics import accuracy_score from scipy.special import softmax #Set display parameters to prevent line truncation pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', None) pd.set_option('display.max_colwidth', 0) data = pd.read_csv('C:/Users/ronal/OneDrive/Desktop/Intro-2-MachineLearning/Datafiles/breast-cancer-wisconsin.data', header=None) headerListb = ['Sample Code Number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class'] data.to_csv("breast-cancer-wisconsin.data", header=headerListb, index=None) data2b = pd.read_csv("breast-cancer-wisconsin.data", na_values = ("?", " ","-", "na", "NA", "n/a", "N/A")) mean_value = data2b[headerListb].mean() data2b.fillna(value = mean_value, inplace=True) new_missing_data2b = data2b.isnull().sum() columns = len(data2b.columns) rows = len(data2b) print('Rows:', rows,'n','Columns:', columns) X = data2b.iloc[:,[1,2,3,4,5,6,7,8,9]].values y = data2b.iloc[:,[10]].values.reshape(X.shape[0], 1) splitX_horizontally_idx = int(X.shape[0]* 0.8) trainX = X[:splitX_horizontally_idx , :] #indexing/selection of the 80% testX = X[splitX_horizontally_idx: , :] # indexing/selection of the remaining 20% splity_horizontally_idx = int(y.shape[0]* 0.8) trainy = y[:splity_horizontally_idx] # indexing/selection of the 80% testy = y[splity_horizontally_idx:] # indexing/selection of the remaining 20% #Split Test Data into 5 K-Folds to test KNN predictor with adjucted K values (hyper-parameter) def kFold(dataset, i, k): n = len(dataset) return dataset[n*(i-1)//k:n*i//k] #Assigning test and train variables for Folds 1 thru 5: Fold1_Xtest, Fold1_ytest = kFold(testX,1,5), kFold(testy,1,5) Fold2_Xtest, Fold2_ytest = kFold(testX,2,5), kFold(testy,2,5) Fold3_Xtest, Fold3_ytest = kFold(testX,3,5), kFold(testy,3,5) Fold4_Xtest, Fold4_ytest = kFold(testX,4,5), kFold(testy,4,5) Fold5_Xtest, Fold5_ytest = kFold(testX,5,5), kFold(testy,5,5) Fold1_Xtrain, Fold1_ytrain = kFold(trainX,1,5), kFold(trainy,1,5) Fold2_Xtrain, Fold2_ytrain = kFold(trainX,2,5), kFold(trainy,2,5) Fold3_Xtrain, Fold3_ytrain = kFold(trainX,3,5), kFold(trainy,3,5) Fold4_Xtrain, Fold4_ytrain = kFold(trainX,4,5), kFold(trainy,4,5) Fold5_Xtrain, Fold5_ytrain = kFold(trainX,5,5), kFold(trainy,5,5) class NeuralNet(): ''' A three layer neural network ''' def __init__(self, layers=[9,6,6,1], learning_rate=0.001, iterations=100): self.params = {} self.learning_rate = learning_rate self.iterations = iterations self.loss = [] self.sample_size = None self.layers = layers self.X = None self.y = None def init_weights(self): ''' Initialize the weights from a random normal distribution ''' np.random.seed(1) # Seed the random number generator self.params["W1"] = np.random.randn(self.layers[0], self.layers[1]) self.params['b1'] =np.random.randn(self.layers[1],) self.params['W2'] = np.random.randn(self.layers[1],self.layers[2]) self.params['b2'] = np.random.randn(self.layers[2],) self.params['W3'] = np.random.randn(self.layers[2],self.layers[3]) self.params['b3'] = np.random.randn(self.layers[3],) def eta(self, x): ETA = 0.0000000001 return np.maximum(x, ETA) def sigmoid(self,Z): ''' The sigmoid function takes in real numbers in any range and squashes it to a real-valued output between 0 and 1. ''' return 1/(1 np.exp(-Z)) def entropy_loss(self,y, yhat): nsample = len(y) yhat_inv = 1.0 - yhat y_inv = 1.0 - y yhat = self.eta(yhat) ## clips value to avoid NaNs in log yhat_inv = self.eta(yhat_inv) loss = -1/nsample * (np.sum(np.multiply(np.log(yhat), y) np.multiply((y_inv), np.log(yhat_inv)))) return loss def forward_propagation(self): ''' Performs the forward propagation ''' Z1 = self.X.dot(self.params['W1']) self.params['b1'] A1 = self.sigmoid(Z1) Z2 = A1.dot(self.params['W2']) self.params['b2'] A2 = self.sigmoid(Z2) Z3 = A2.dot(self.params['W3']) self.params['b3'] yhat = softmax(Z3) loss = self.entropy_loss(self.y,yhat) # save calculated parameters self.params['Z1'] = Z1 self.params['Z2'] = Z2 self.params['Z3'] = Z3 self.params['A1'] = A1 self.params['A2'] = A2 return yhat,loss def back_propagation(self,yhat): ''' Computes the derivatives and update weights and bias according. ''' y_inv = 1 - self.y yhat_inv = 1 - yhat sig1 = self.sigmoid(self.params['Z1']) dsig1 = sig1*(1-sig1) sig2 = self.sigmoid(self.params['Z2']) dsig2 = sig2*(1-sig2) dl_wrt_yhat = np.divide(y_inv, self.eta(yhat_inv)) - np.divide(self.y, self.eta(yhat)) dl_wrt_softmax = yhat * (yhat_inv) dl_wrt_z3 = dl_wrt_softmax * dl_wrt_yhat dl_wrt_A2 = dl_wrt_z3.dot(self.params['W3'].T) dl_wrt_w3 = self.params['A2'].T.dot(dl_wrt_z3) dl_wrt_b3 = np.sum(dl_wrt_z3, axis=0, keepdims=True) dl_wrt_z2 = dl_wrt_A2 * dsig2 dl_wrt_A1 = dl_wrt_z2.dot(self.params['W2'].T) dl_wrt_w2 = self.params['A1'].T.dot(dl_wrt_z2) dl_wrt_b2 = np.sum(dl_wrt_z2, axis=0, keepdims=True) dl_wrt_z1 = dl_wrt_A1*dsig1 dl_wrt_w1 = self.X.T.dot(dl_wrt_z1) dl_wrt_b1 = np.sum(dl_wrt_z1, axis=0, keepdims=True) #update the weights and bias self.params['W1'] = self.params['W1'] - self.learning_rate * dl_wrt_w1 self.params['W2'] = self.params['W2'] - self.learning_rate * dl_wrt_w2 self.params['W3'] = self.params['W3'] - self.learning_rate * dl_wrt_w3 self.params['b1'] = self.params['b1'] - self.learning_rate * dl_wrt_b3 self.params['b2'] = self.params['b2'] - self.learning_rate * dl_wrt_b2 self.params['b3'] = self.params['b3'] - self.learning_rate * dl_wrt_b1 def fit(self, X, y): ''' Trains the neural network using the specified data and labels ''' self.X = X self.y = y self.init_weights() #initialize weights and bias for i in range(self.iterations): yhat, loss = self.forward_propagation() self.back_propagation(yhat) self.loss.append(loss) def predict(self, X): ''' Predicts on a test data ''' Z1 = X.dot(self.params['W1']) self.params['b1'] A1 = self.sigmoid(Z1) Z2 = A1.dot(self.params['W2']) self.params['b2'] Z3 = Z2.dot(self.params['W3']) self.params['b3'] pred = self.sigmoid(Z3) return np.round(pred) def acc(self, y, yhat): ''' Calculates the accutacy between the predicted valuea and the truth labels ''' acc = int(sum(y == yhat) / len(y) * 100) return acc def plot_loss(self): ''' Plots the loss curve ''' plt.plot(self.loss) plt.xlabel("Iteration") plt.ylabel("logloss") plt.title("Loss curve for training") plt.show() model1 = NeuralNet(layers = [9,7,7,1], learning_rate=0.001, iterations=1000) model1.fit(Fold1_Xtrain, Fold1_ytrain) pred1 = model1.predict(Fold1_Xtest) Acc1 = accuracy_score(pred1, Fold1_ytest) print(Acc1)
Комментарии:
1. Помните, что при умножении матриц,
n,m x m,p = n,p
. Я имею в виду, что если вы пытаетесь умножить (111,7) и (1,7), вам следует перенести (1,7).