#python #scikit-learn #xgboost #grid-search
Вопрос:
Я запускаю RandomizedGridSearchCV в XGBoostClassifier с набором параметров для запуска поиска по сетке, но при запуске кода для RandomizedGridCV программа сталкивается со следующей ошибкой —
C:UsersShikharAppDataRoamingPythonPython38site-packagessklearnmodel_selection_search.py:285: UserWarning: The total space of parameters 6 is smaller than n_iter=500. Running 6 iterations. For exhaustive searches, use GridSearchCV.
warnings.warn(
C:UsersShikharAppDataRoamingPythonPython38site-packagessklearnmodel_selection_search.py:918: UserWarning: One or more of the test scores are non-finite: [nan nan nan nan nan nan]
warnings.warn(
C:ProgramDataAnaconda3libsite-packagesxgboostsklearn.py:888: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
warnings.warn(label_encoder_deprecation_msg, UserWarning)
---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-38-9e95bcb72de1> in <module>
11 )
12
---> 13 trained_classifiers = classifier.fit(X_train[top_features], y_train)
14
15 best_classifier = trained_classifiers.best_estimator_
~AppDataRoamingPythonPython38site-packagessklearnutilsvalidation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~AppDataRoamingPythonPython38site-packagessklearnmodel_selection_search.py in fit(self, X, y, groups, **fit_params)
878 refit_start_time = time.time()
879 if y is not None:
--> 880 self.best_estimator_.fit(X, y, **fit_params)
881 else:
882 self.best_estimator_.fit(X, **fit_params)
C:ProgramDataAnaconda3libsite-packagesxgboostcore.py in inner_f(*args, **kwargs)
420 for k, arg in zip(sig.parameters, args):
421 kwargs[k] = arg
--> 422 return f(**kwargs)
423
424 return inner_f
C:ProgramDataAnaconda3libsite-packagesxgboostsklearn.py in fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, feature_weights, callbacks)
907 eval_group=None, label_transform=label_transform)
908
--> 909 self._Booster = train(xgb_options, train_dmatrix,
910 self.get_num_boosting_rounds(),
911 evals=evals,
C:ProgramDataAnaconda3libsite-packagesxgboosttraining.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks)
225 Booster : a trained booster model
226 """
--> 227 bst = _train_internal(params, dtrain,
228 num_boost_round=num_boost_round,
229 evals=evals,
C:ProgramDataAnaconda3libsite-packagesxgboosttraining.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks, evals_result, maximize, verbose_eval, early_stopping_rounds)
100 # Skip the first update if it is a recovery step.
101 if version % 2 == 0:
--> 102 bst.update(dtrain, i, obj)
103 bst.save_rabit_checkpoint()
104 version = 1
C:ProgramDataAnaconda3libsite-packagesxgboostcore.py in update(self, dtrain, iteration, fobj)
1278
1279 if fobj is None:
-> 1280 _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
1281 ctypes.c_int(iteration),
1282 dtrain.handle))
C:ProgramDataAnaconda3libsite-packagesxgboostcore.py in _check_call(ret)
187 """
188 if ret != 0:
--> 189 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
190
191
XGBoostError: [22:30:15] C:UsersAdministratorworkspacexgboost-win64_release_1.3.0srcgbmgbm.cc:26: Unknown gbm type g
версия xgboost 1.3.3
Любой другой способ сделать то же самое будет высоко оценен.
В то время как я заменяю RandomizedGridSearchCV на GridSearchCV, программа запускается без каких-либо ошибок.
Код:
import xgboost as xgb
model = xgb.XGBClassifier()
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
TRAINING_PARAMETERS = {
"n_estimators" : [500, 300, 700],
"learning_rate": [0.001, 0.1, 1],
"subsample" : [.7, 0.8, 0.9],
"max_depth" : [3, 4, 5]
}
classifier = RandomizedSearchCV(
estimator = model,
param_distributions = TRAINING_PARAMETERS,
n_jobs = 16,
n_iter = 500,
cv = 3,
#scoring = "roc_auc",
#verbose = 15
)
trained_classifiers = classifier.fit(X_train[top_features], y_train)
Данные состоят из 122 столбцов признаков и размера выборки 500 . Цель является двоичной (0 и 1).
Комментарии:
1. Пожалуйста, укажите значения в
TRAINING_PARAMETERS
. В идеале также предоставьте пример набора данных, который обеспечивает такое же поведение.2. @BenReiniger Я включил описание
TRAINING_PARAMETERS
и данные.