Разные результаты в линейной регрессии Sklearn с одним и тем же набором данных (UCI student performance dataset)

#python #machine-learning #scikit-learn #linear-regression

#python #машинное обучение #scikit-learn #линейная регрессия

Вопрос:

Я получаю разные результаты при прогнозировании значений. Вывод в train_linear_regression_model() правильный, но когда я запускаю fetch_prediction_data() для одного пользователя, он выдает очень неправильные значения.

Код

 def get_dataset_for_linear():
    efficacy_data = models.StudentEfficacy.objects.all()

    features = efficacy_data.values_list('father_education', 'internet_facility', 'study_time',
                                                 'paid_tuition', 'past_failures', 'free_time',
                                                 'extra_curricular_activities',
                                                 'absences', 'past_marks', 'past_marks1', 'class_engagement', 'health')
    labels = efficacy_data.values_list('predictions')

    np_features_avg = np.array(features)
    np_labels_avg = np.array(labels)

    return np_features_avg, np_labels_avg


def train_linear_regression_model():
    test_features = list()
    test_labels = list()

    feature_dataset, label_dataset = get_dataset_for_linear()

    best_accuracy = 0
    for i in range(10000):
        train_features, test_features, train_labels, test_labels = 
            sklearn.model_selection.train_test_split(feature_dataset, label_dataset, test_size=0.9)

        mdl_linear_regression = linear_model.LinearRegression()

        mdl_linear_regression.fit(train_features, train_labels)
        accuracy = mdl_linear_regression.score(test_features, test_labels)

        if best_accuracy < accuracy:
            with open("StudentPerformancePrediction/MachineLearningModels/mdl_linear_regression.pickle", "wb") as regression_file:
                pickle.dump(mdl_linear_regression, regression_file)

            best_accuracy = accuracy

    print(best_accuracy)

    is_debugger_attached = bool(__debug__)
    if is_debugger_attached:
        pickled_model = open("StudentPerformancePrediction/MachineLearningModels/mdl_linear_regression.pickle", "rb")
        mdl_linear_regression = pickle.load(pickled_model)

        predicted_labels = mdl_linear_regression.predict(test_features)

        count = 0
        b = 0
        for x in predicted_labels:
            up = ceil(x)   1
            down = floor(x) - 1
            print(f'ttt test:{down} : {test_labels[b]} : {up}')
            b  = 1
        print(f'{count}/{len(predicted_labels)}')

        print(f'Coefficients: {mdl_linear_regression.coef_}')
        print(f'Mean squared error: {mean_squared_error(test_labels, predicted_labels)}')
        print(f'Coefficient of determination: {r2_score(test_labels, predicted_labels)}')


def fetch_prediction_data(user):
    efficacy_data = models.StudentEfficacy.objects.get(student_efficacy_id=user)

    features = list(model_to_dict(efficacy_data, fields=['father_education', 'internet_facility', 'study_time',
                                                 'paid_tuition', 'past_failures', 'free_time',
                                                 'extra_curricular_activities',
                                                 'absences', 'past_marks', 'past_marks1', 'class_engagement', 'health'])
                    .values())
    labels = list(model_to_dict(efficacy_data, fields=["predictions"]).values())

    feature_dataset = np.array(features)
    label_dataset = np.array(labels)
    feature_dataset = feature_dataset.reshape(1, -1)
    label_dataset = label_dataset.reshape(1, -1)

    pickled_model = open("StudentPerformancePrediction/MachineLearningModels/mdl_linear_regression.pickle", "rb")
    mdl_linear_regression = pickle.load(pickled_model)

    predicted = mdl_linear_regression.predict(feature_dataset)

    return predicted, label_dataset, efficacy_data.student

 

вызов функции для отображения значений

 def test(request):
    LinearRegression.train_linear_regression_model()
    c = ""
    for i in spp.StudentEfficacy.objects.all().order_by('pk'):
        # a, b = LinearRegression.fetch_prediction_data(msh.MySchoolUser.objects.get(auth_user=i.student.auth_user))
        a, b, d = LinearRegression.fetch_prediction_data(i.pk)
        c  = f'<h2>predicted: {a} : {b} : {d}</h2><br />'
    return HttpResponse(f'<h1>{c}</h1>')

 

функция точности и конечной формы вывода: «train_linear_regression_model»

 acc: 0.8281699530791696
output : format = test:[predicted - 2 : actual value : predicted   1]
                         test:102 : [95] : 105
                         test:34 : [50] : 37
                         test:9 : [30] : 12
                         test:50 : [60] : 53
                         test:39 : [55] : 42
                         test:36 : [50] : 39
                         test:41 : [50] : 44
                         test:60 : [70] : 63
                         test:32 : [45] : 35
                         test:-7 : [0] : -4
                         test:65 : [70] : 68
                         test:74 : [75] : 77
                         test:54 : [60] : 57
                         test:33 : [40] : 36
                         test:54 : [60] : 57
                         test:39 : [50] : 42
                         test:30 : [40] : 33
                         test:62 : [65] : 65
                         test:53 : [55] : 56
                         test:21 : [0] : 24
                         test:-13 : [0] : -10
                         test:99 : [90] : 102
                         test:15 : [30] : 18
                         test:44 : [40] : 47
                         test:99 : [90] : 102
                         test:78 : [80] : 81
                         test:73 : [75] : 76
 

вывод из: «fetch_prediction_data (user)»

 format : ["predicted" value] : actualValue : userID
predicted: [[1.19540496]] : [[30]] : Frederick342

predicted: [[-3.81400747]] : [[30]] : Tyler763

predicted: [[9.16808199]] : [[50]] : Kimberly521

predicted: [[26.72044263]] : [[75]] : Jared721

predicted: [[18.3986461]] : [[50]] : Kate8105

predicted: [[34.63203366]] : [[75]] : Rebecca900

predicted: [[18.76076664]] : [[55]] : Julia9895

predicted: [[-3.61118462]] : [[30]] : Anna1079

predicted: [[32.96699407]] : [[95]] : Alfred11685

predicted: [[33.57675547]] : [[75]] : Sienna1258

predicted: [[7.41266665]] : [[45]] : Adison13475

predicted: [[15.34683413]] : [[60]] : Derek1437

predicted: [[30.93347564]] : [[70]] : Alexia15265

predicted: [[15.0282954]] : [[55]] : Miranda1616

predicted: [[29.62933642]] : [[80]] : Arthur17055