Tensorflow: Ошибка значения: Ввод 0 несовместим с моделью слоя: ожидаемая форма=(Нет, 99), найденная форма=(Нет, 3)

#python #tensorflow #opencv #keras #neural-network

Вопрос:

Я пытаюсь предсказать с помощью модели классификации ANN, созданной в Tensorflow, чтобы классифицировать ключевые точки позы с помощью MediaPipe. Трекер позы mediapipe содержит 33 ключевые точки для координат x y и z, в общей сложности 99 точек данных.

Я тренируюсь в 4 классах.

Это запуск встраивания позы

 import mediapipe as mp import numpy as np import tensorflow as tf from tensorflow import keras mp_pose = mp.solutions.pose   def get_center_point(landmarks, left_bodypart, right_bodypart):  """Calculates the center point of the two given landmarks."""   left = tf.gather(landmarks, left_bodypart.value, axis=1)  right = tf.gather(landmarks, right_bodypart.value, axis=1)  center = left * 0.5   right * 0.5  return center   def get_pose_size(landmarks, torso_size_multiplier=2.5):  """Calculates pose size.   It is the maximum of two values:  * Torso size multiplied by `torso_size_multiplier`  * Maximum distance from pose center to any pose landmark  """  # Hips center  hips_center = get_center_point(landmarks, mp_pose.PoseLandmark.LEFT_HIP,   mp_pose.PoseLandmark.RIGHT_HIP)   # Shoulders center  shoulders_center = get_center_point(landmarks,mp_pose.PoseLandmark.LEFT_SHOULDER,  mp_pose.PoseLandmark.RIGHT_SHOULDER)   # Torso size as the minimum body size  torso_size = tf.linalg.norm(shoulders_center - hips_center)   # Pose center  pose_center_new = get_center_point(landmarks,mp_pose.PoseLandmark.LEFT_HIP,   mp_pose.PoseLandmark.RIGHT_HIP)  pose_center_new = tf.expand_dims(pose_center_new, axis=1)  # Broadcast the pose center to the same size as the landmark vector to  # perform substraction  pose_center_new = tf.broadcast_to(pose_center_new,  [tf.size(landmarks) // (33*3), 33, 3])   # Dist to pose center  d = tf.gather(landmarks - pose_center_new, 0, axis=0,  name="dist_to_pose_center")  # Max dist to pose center  max_dist = tf.reduce_max(tf.linalg.norm(d, axis=0))   # Normalize scale  pose_size = tf.maximum(torso_size * torso_size_multiplier, max_dist)   return pose_size   def normalize_pose_landmarks(landmarks):  """Normalizes the landmarks translation by moving the pose center to (0,0) and  scaling it to a constant pose size.  """  # Move landmarks so that the pose center becomes (0,0)  pose_center = get_center_point(landmarks, mp_pose.PoseLandmark.LEFT_HIP,   mp_pose.PoseLandmark.RIGHT_HIP)  pose_center = tf.expand_dims(pose_center, axis=1)  # Broadcast the pose center to the same size as the landmark vector to perform  # substraction  pose_center = tf.broadcast_to(pose_center,   [tf.size(landmarks) // (33*3), 33, 3])  landmarks = landmarks - pose_center   # Scale the landmarks to a constant pose size  pose_size = get_pose_size(landmarks)  landmarks /= pose_size   return landmarks   def landmarks_to_embedding(landmarks_and_scores):  """Converts the input landmarks into a pose embedding."""  # Reshape the flat input into a matrix with shape=(33, 3)  reshaped_inputs = keras.layers.Reshape((33, 3))(landmarks_and_scores)   # Normalize landmarks 3D  landmarks = normalize_pose_landmarks(reshaped_inputs[:, :, :3])   # Flatten the normalized landmark coordinates into a vector  embedding = keras.layers.Flatten()(landmarks)   return embedding  

Затем я создаю модель и загружаю в нее входные данные для встраивания

 import csv import cv2 import itertools import numpy as np import pandas as pd import os import sys import tempfile import tqdm import mediapipe as mp from matplotlib import pyplot as plt from matplotlib.collections import LineCollection import tensorflow as tf from tensorflow import keras from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from poseEmbedding import get_center_point, get_pose_size, normalize_pose_landmarks, landmarks_to_embedding   def load_pose_landmarks(csv_path):  #load CSV file  dataframe = pd.read_csv(csv_path)  df_to_process = dataframe.copy()    #extract the list of class names  classes = df_to_process.pop('class_name').unique()    #extract the labels  y = df_to_process.pop('class_no')    #convert the input features and labels into float64 format for training  X = df_to_process.astype('float64')  y = keras.utils.to_categorical(y)    return X,y, classes, dataframe csvs_out_train_path = 'train_data.csv' csvs_out_test_path = 'test_data.csv'  #Load training data  X, y, class_names, _ = load_pose_landmarks(csvs_out_train_path)  #split training data(X,y) into (X_train, y_train) and (X_val, y_val) X_train, X_val, y_train, y_val = train_test_split(X,y, test_size=0.15)  X_test, y_test, _, df_test = load_pose_landmarks(csvs_out_test_path)  mp_pose = mp.solutions.pose  inputs = tf.keras.Input(shape=(99)) embedding = landmarks_to_embedding(inputs)  layer = keras.layers.Dense(128, activation=tf.nn.relu6)(embedding) layer = keras.layers.Dropout(0.5)(layer) layer = keras.layers.Dense(64, activation=tf.nn.relu6)(layer) layer = keras.layers.Dropout(0.5)(layer) outputs = keras.layers.Dense(4, activation="softmax")(layer)  model = keras.Model(inputs, outputs) #model.summary()   model.compile(  optimizer = 'adam',  loss = 'categorical_crossentropy',  metrics=['accuracy'] )     # Start training history = model.fit(X_train, y_train,  epochs=200,  batch_size=16,  validation_data=(X_val, y_val)) model.save("complete_epoch_model")   # Visualize the training history to see whether you're overfitting. plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['TRAIN', 'VAL'], loc='lower right') plt.show() loss, accuracy = model.evaluate(X_test, y_test)  

The model summary prints this out:

 Layer (type) Output Shape Param # Connected to  ==================================================================================================  input_18 (InputLayer) [(None, 99)] 0 []     reshape_17 (Reshape) (None, 33, 3) 0 ['input_18[0][0]']     tf.__operators__.getitem_10 (S (None, 33, 3) 0 ['reshape_17[0][0]']   licingOpLambda)     tf.compat.v1.gather_69 (TFOpLa (None, 3) 0 ['tf.__operators__.getitem_10[0][  mbda) 0]']     tf.compat.v1.gather_70 (TFOpLa (None, 3) 0 ['tf.__operators__.getitem_10[0][  mbda) 0]']     tf.math.multiply_69 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_69[0][0]']   a)     tf.math.multiply_70 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_70[0][0]']   a)     tf.__operators__.add_31 (TFOpL (None, 3) 0 ['tf.math.multiply_69[0][0]',   ambda) 'tf.math.multiply_70[0][0]']     tf.compat.v1.size_17 (TFOpLamb () 0 ['tf.__operators__.getitem_10[0][  da) 0]']     tf.expand_dims_17 (TFOpLambda) (None, 1, 3) 0 ['tf.__operators__.add_31[0][0]']    tf.compat.v1.floor_div_17 (TFO () 0 ['tf.compat.v1.size_17[0][0]']   pLambda)     tf.broadcast_to_17 (TFOpLambda (None, 33, 3) 0 ['tf.expand_dims_17[0][0]',   ) 'tf.compat.v1.floor_div_17[0][0]  ']     tf.math.subtract_23 (TFOpLambd (None, 33, 3) 0 ['tf.__operators__.getitem_10[0][  a) 0]',   'tf.broadcast_to_17[0][0]']     tf.compat.v1.gather_75 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']   mbda)     tf.compat.v1.gather_76 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']   mbda)     tf.math.multiply_75 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_75[0][0]']   a)     tf.math.multiply_76 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_76[0][0]']   a)     tf.__operators__.add_34 (TFOpL (None, 3) 0 ['tf.math.multiply_75[0][0]',   ambda) 'tf.math.multiply_76[0][0]']     tf.compat.v1.size_18 (TFOpLamb () 0 ['tf.math.subtract_23[0][0]']   da)     tf.compat.v1.gather_73 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']   mbda)     tf.compat.v1.gather_74 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']   mbda)     tf.compat.v1.gather_71 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']   mbda)     tf.compat.v1.gather_72 (TFOpLa (None, 3) 0 ['tf.math.subtract_23[0][0]']   mbda)     tf.expand_dims_18 (TFOpLambda) (None, 1, 3) 0 ['tf.__operators__.add_34[0][0]']    tf.compat.v1.floor_div_18 (TFO () 0 ['tf.compat.v1.size_18[0][0]']   pLambda)     tf.math.multiply_73 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_73[0][0]']   a)     tf.math.multiply_74 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_74[0][0]']   a)     tf.math.multiply_71 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_71[0][0]']   a)     tf.math.multiply_72 (TFOpLambd (None, 3) 0 ['tf.compat.v1.gather_72[0][0]']   a)     tf.broadcast_to_18 (TFOpLambda (None, 33, 3) 0 ['tf.expand_dims_18[0][0]',   ) 'tf.compat.v1.floor_div_18[0][0]  ']     tf.__operators__.add_33 (TFOpL (None, 3) 0 ['tf.math.multiply_73[0][0]',   ambda) 'tf.math.multiply_74[0][0]']     tf.__operators__.add_32 (TFOpL (None, 3) 0 ['tf.math.multiply_71[0][0]',   ambda) 'tf.math.multiply_72[0][0]']     tf.math.subtract_25 (TFOpLambd (None, 33, 3) 0 ['tf.math.subtract_23[0][0]',   a) 'tf.broadcast_to_18[0][0]']     tf.math.subtract_24 (TFOpLambd (None, 3) 0 ['tf.__operators__.add_33[0][0]',  a) 'tf.__operators__.add_32[0][0]']    tf.compat.v1.gather_77 (TFOpLa (33, 3) 0 ['tf.math.subtract_25[0][0]']   mbda)     tf.compat.v1.norm_14 (TFOpLamb () 0 ['tf.math.subtract_24[0][0]']   da)     tf.compat.v1.norm_15 (TFOpLamb (3,) 0 ['tf.compat.v1.gather_77[0][0]']   da)     tf.math.multiply_77 (TFOpLambd () 0 ['tf.compat.v1.norm_14[0][0]']   a)     tf.math.reduce_max_7 (TFOpLamb () 0 ['tf.compat.v1.norm_15[0][0]']   da)     tf.math.maximum_7 (TFOpLambda) () 0 ['tf.math.multiply_77[0][0]',   'tf.math.reduce_max_7[0][0]']     tf.math.truediv_7 (TFOpLambda) (None, 33, 3) 0 ['tf.math.subtract_23[0][0]',   'tf.math.maximum_7[0][0]']     flatten_7 (Flatten) (None, 99) 0 ['tf.math.truediv_7[0][0]']     dense_21 (Dense) (None, 128) 12800 ['flatten_7[0][0]']     dropout_14 (Dropout) (None, 128) 0 ['dense_21[0][0]']     dense_22 (Dense) (None, 64) 8256 ['dropout_14[0][0]']     dropout_15 (Dropout) (None, 64) 0 ['dense_22[0][0]']     dense_23 (Dense) (None, 4) 260 ['dropout_15[0][0]']    ================================================================================================== Total params: 21,316 Trainable params: 21,316 Non-trainable params: 0 __________________________________________________________________________________________________  

Now when I try to run inference on my webcam, I get the following error from mediapipe and Tensorflow:

 ValueError: Input 0 is incompatible with layer model: expected shape=(None, 99), found shape=(None, 3)  

Я не уверен, как исправить эту ошибку, так как я мог тренироваться только с формой 99, так как TF выдавал мне ошибки при использовании формы 3 при попытке компиляции. Как мне это исправить?

Это мой код вывода:

 import cv2 import os import tqdm import numpy as np import logging from mediapipe.python.solutions import pose as mp_pose from mediapipe.python.solutions import drawing_utils as mp_drawing import tensorflow as tf from tensorflow import keras from tensorflow.keras import backend as K from tensorflow.keras.utils import CustomObjectScope   def relu6(x):  return K.relu(x, max_value=6)  logging.getLogger().setLevel(logging.CRITICAL)    cap = cv2.VideoCapture(0)  model = tf.keras.models.load_model('weights_best.hdf5', compile = True,  custom_objects = {"relu6": relu6})   with mp_pose.Pose() as pose_tracker:  while cap.isOpened():  # Get next frame of the video.  ret, frame = cap.read()    # Run pose tracker.  imagefirst = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  image = cv2.flip(imagefirst,1)   result = pose_tracker.process(image)  pose_landmarks = result.pose_landmarks   # Draw pose prediction.  if pose_landmarks is not None:  mp_drawing.draw_landmarks(  image,  landmark_list=pose_landmarks,  connections=mp_pose.POSE_CONNECTIONS)   if pose_landmarks is not None:  # Get landmarks.  frame_height, frame_width = frame.shape[0], frame.shape[1]  pose_landmarks = np.array([[lmk.x * frame_width, lmk.y * frame_height, lmk.z * frame_width]  for lmk in pose_landmarks.landmark], dtype=np.float32)  assert pose_landmarks.shape == (33, 3), 'Unexpected landmarks shape: {}'.format(pose_landmarks.shape)  prediction = model.predict(pose_landmarks)     # Save the output frame.  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)   cv2.imshow('Raw Webcam Feed', image)  if cv2.waitKey(10) amp; 0xFF == ord('q'):  break  # Close output video. cap.release() cv2.destroyAllWindows()  # Release MediaPipe resources. pose_tracker.close()  

Ответ №1:

Может быть, попробуйте изменить форму pose_landmarks от (33, 3) до (1, 99 ) после вашего утверждения и до того, как вы сделаете прогноз:

 import tensorflow as tf  pose_landmarks = tf.random.normal((33, 3)) assert pose_landmarks.shape == (33, 3), 'Unexpected landmarks shape: {}'.format(pose_landmarks.shape)  pose_landmarks = tf.expand_dims(pose_landmarks, axis=0) shape = tf.shape(pose_landmarks) pose_landmarks = tf.reshape(pose_landmarks, (shape[0], shape[1] * shape[2]))  tf.print(pose_landmarks.shape)  
 TensorShape([1, 99])  

Комментарии:

1. Спасибо, я думаю, все это время я действительно искал эту tf.reshape() функцию. Я все время думал, что форма была неправильной во время тренировки, но я не знал, что мы можем измениться. Полезно знать то tf.expand_dims() и то tf.shape() . Я предполагаю, что это необходимые функции, прежде чем вы сможете изменить данные?