Попытка закодировать 3-слойную нейронную сеть, используя Активацию Сигмоиды и Softmax в качестве активации внешнего слоя: Получите строку ошибки значения 165

#python #python-3.x #neural-network

#питон #python-3.x #нейронная сеть

Вопрос:

При попытке запустить свой код я получаю следующую ошибку:

 line 165, in back_propagation dl_wrt_A2 = dl_wrt_z3.dot(self.params['W3'].T) ValueError: shapes (111,7) and (1,7) not aligned: 7 (dim 1) != 1 (dim 0)  

Я уверен, что в моем обратном распространении, производных и кодировании softmax есть ошибки. Код приведен ниже. Я не новичок в кодировании, но я должен писать код с нуля, не используя продвинутые библиотеки Python. Любая помощь будет очень признательна!

 import pandas as pd  import numpy as np  from pandas import DataFrame  from numpy import nan  import matplotlib.pyplot as plt  from numpy.random import randn  from sklearn.metrics import accuracy_score  from scipy.special import softmax      #Set display parameters to prevent line truncation  pd.set_option('display.max_rows', None)  pd.set_option('display.max_columns', None)  pd.set_option('display.width', None)  pd.set_option('display.max_colwidth', 0)    data = pd.read_csv('C:/Users/ronal/OneDrive/Desktop/Intro-2-MachineLearning/Datafiles/breast-cancer-wisconsin.data', header=None)  headerListb = ['Sample Code Number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion',  'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']  data.to_csv("breast-cancer-wisconsin.data", header=headerListb, index=None)  data2b = pd.read_csv("breast-cancer-wisconsin.data", na_values = ("?", " ","-", "na", "NA", "n/a", "N/A"))    mean_value = data2b[headerListb].mean()  data2b.fillna(value = mean_value, inplace=True)      new_missing_data2b = data2b.isnull().sum()  columns = len(data2b.columns)  rows = len(data2b)    print('Rows:', rows,'n','Columns:', columns)    X = data2b.iloc[:,[1,2,3,4,5,6,7,8,9]].values  y = data2b.iloc[:,[10]].values.reshape(X.shape[0], 1)    splitX_horizontally_idx = int(X.shape[0]* 0.8)  trainX = X[:splitX_horizontally_idx , :] #indexing/selection of the 80%  testX = X[splitX_horizontally_idx: , :] # indexing/selection of the remaining 20%    splity_horizontally_idx = int(y.shape[0]* 0.8)  trainy = y[:splity_horizontally_idx] # indexing/selection of the 80%  testy = y[splity_horizontally_idx:] # indexing/selection of the remaining 20%    #Split Test Data into 5 K-Folds to test KNN predictor with adjucted K values (hyper-parameter)  def kFold(dataset, i, k):  n = len(dataset)  return dataset[n*(i-1)//k:n*i//k]    #Assigning test and train variables for Folds 1 thru 5:  Fold1_Xtest, Fold1_ytest = kFold(testX,1,5), kFold(testy,1,5)  Fold2_Xtest, Fold2_ytest = kFold(testX,2,5), kFold(testy,2,5)  Fold3_Xtest, Fold3_ytest = kFold(testX,3,5), kFold(testy,3,5)  Fold4_Xtest, Fold4_ytest = kFold(testX,4,5), kFold(testy,4,5)  Fold5_Xtest, Fold5_ytest = kFold(testX,5,5), kFold(testy,5,5)    Fold1_Xtrain, Fold1_ytrain = kFold(trainX,1,5), kFold(trainy,1,5)  Fold2_Xtrain, Fold2_ytrain = kFold(trainX,2,5), kFold(trainy,2,5)  Fold3_Xtrain, Fold3_ytrain = kFold(trainX,3,5), kFold(trainy,3,5)  Fold4_Xtrain, Fold4_ytrain = kFold(trainX,4,5), kFold(trainy,4,5)  Fold5_Xtrain, Fold5_ytrain = kFold(trainX,5,5), kFold(trainy,5,5)        class NeuralNet():  '''  A three layer neural network  '''    def __init__(self, layers=[9,6,6,1], learning_rate=0.001, iterations=100):  self.params = {}  self.learning_rate = learning_rate  self.iterations = iterations  self.loss = []  self.sample_size = None  self.layers = layers  self.X = None  self.y = None    def init_weights(self):  '''  Initialize the weights from a random normal distribution  '''  np.random.seed(1) # Seed the random number generator  self.params["W1"] = np.random.randn(self.layers[0], self.layers[1])   self.params['b1'] =np.random.randn(self.layers[1],)  self.params['W2'] = np.random.randn(self.layers[1],self.layers[2])   self.params['b2'] = np.random.randn(self.layers[2],)  self.params['W3'] = np.random.randn(self.layers[2],self.layers[3])  self.params['b3'] = np.random.randn(self.layers[3],)     def eta(self, x):  ETA = 0.0000000001  return np.maximum(x, ETA)      def sigmoid(self,Z):  '''  The sigmoid function takes in real numbers in any range and   squashes it to a real-valued output between 0 and 1.  '''  return 1/(1 np.exp(-Z))      def entropy_loss(self,y, yhat):  nsample = len(y)  yhat_inv = 1.0 - yhat  y_inv = 1.0 - y  yhat = self.eta(yhat) ## clips value to avoid NaNs in log  yhat_inv = self.eta(yhat_inv)   loss = -1/nsample * (np.sum(np.multiply(np.log(yhat), y)   np.multiply((y_inv), np.log(yhat_inv))))  return loss    def forward_propagation(self):  '''  Performs the forward propagation  '''    Z1 = self.X.dot(self.params['W1'])   self.params['b1']  A1 = self.sigmoid(Z1)  Z2 = A1.dot(self.params['W2'])   self.params['b2']  A2 = self.sigmoid(Z2)  Z3 = A2.dot(self.params['W3'])   self.params['b3']  yhat = softmax(Z3)  loss = self.entropy_loss(self.y,yhat)    # save calculated parameters   self.params['Z1'] = Z1  self.params['Z2'] = Z2  self.params['Z3'] = Z3  self.params['A1'] = A1  self.params['A2'] = A2    return yhat,loss    def back_propagation(self,yhat):  '''  Computes the derivatives and update weights and bias according.  '''  y_inv = 1 - self.y  yhat_inv = 1 - yhat    sig1 = self.sigmoid(self.params['Z1'])  dsig1 = sig1*(1-sig1)    sig2 = self.sigmoid(self.params['Z2'])  dsig2 = sig2*(1-sig2)    dl_wrt_yhat = np.divide(y_inv, self.eta(yhat_inv)) - np.divide(self.y, self.eta(yhat))  dl_wrt_softmax = yhat * (yhat_inv)  dl_wrt_z3 = dl_wrt_softmax * dl_wrt_yhat    dl_wrt_A2 = dl_wrt_z3.dot(self.params['W3'].T)  dl_wrt_w3 = self.params['A2'].T.dot(dl_wrt_z3)  dl_wrt_b3 = np.sum(dl_wrt_z3, axis=0, keepdims=True)    dl_wrt_z2 = dl_wrt_A2 * dsig2  dl_wrt_A1 = dl_wrt_z2.dot(self.params['W2'].T)  dl_wrt_w2 = self.params['A1'].T.dot(dl_wrt_z2)  dl_wrt_b2 = np.sum(dl_wrt_z2, axis=0, keepdims=True)    dl_wrt_z1 = dl_wrt_A1*dsig1  dl_wrt_w1 = self.X.T.dot(dl_wrt_z1)  dl_wrt_b1 = np.sum(dl_wrt_z1, axis=0, keepdims=True)    #update the weights and bias  self.params['W1'] = self.params['W1'] - self.learning_rate * dl_wrt_w1  self.params['W2'] = self.params['W2'] - self.learning_rate * dl_wrt_w2  self.params['W3'] = self.params['W3'] - self.learning_rate * dl_wrt_w3  self.params['b1'] = self.params['b1'] - self.learning_rate * dl_wrt_b3  self.params['b2'] = self.params['b2'] - self.learning_rate * dl_wrt_b2  self.params['b3'] = self.params['b3'] - self.learning_rate * dl_wrt_b1    def fit(self, X, y):  '''  Trains the neural network using the specified data and labels  '''  self.X = X  self.y = y  self.init_weights() #initialize weights and bias      for i in range(self.iterations):  yhat, loss = self.forward_propagation()  self.back_propagation(yhat)  self.loss.append(loss)    def predict(self, X):  '''  Predicts on a test data  '''  Z1 = X.dot(self.params['W1'])   self.params['b1']  A1 = self.sigmoid(Z1)  Z2 = A1.dot(self.params['W2'])   self.params['b2']  Z3 = Z2.dot(self.params['W3'])   self.params['b3']  pred = self.sigmoid(Z3)  return np.round(pred)     def acc(self, y, yhat):  '''  Calculates the accutacy between the predicted valuea and the truth labels  '''  acc = int(sum(y == yhat) / len(y) * 100)  return acc      def plot_loss(self):  '''  Plots the loss curve  '''  plt.plot(self.loss)  plt.xlabel("Iteration")  plt.ylabel("logloss")  plt.title("Loss curve for training")  plt.show()     model1 = NeuralNet(layers = [9,7,7,1], learning_rate=0.001, iterations=1000)  model1.fit(Fold1_Xtrain, Fold1_ytrain)  pred1 = model1.predict(Fold1_Xtest)  Acc1 = accuracy_score(pred1, Fold1_ytest)  print(Acc1)  

Комментарии:

1. Помните, что при умножении матриц, n,m x m,p = n,p . Я имею в виду, что если вы пытаетесь умножить (111,7) и (1,7), вам следует перенести (1,7).