Как передать аргументы f1_score в make_scorer в scikit learn для использования с cross_val_score?

#python #scikit-learn #cross-validation #make-scorer

#python #scikit-learn #перекрестная проверка #make-scorer

Вопрос:

У меня проблема с множественной классификацией (со многими метками), и я хочу использовать оценку F1 с ‘average’ = ‘weighted’.

Однако кое-что я делаю неправильно. Вот мой код:

 from sklearn.metrics import f1_score

from sklearn.metrics import make_scorer

f1 = make_scorer(f1_score,  {'average' : 'weighted'})

np.mean(cross_val_score(model, X, y, cv=8, n_jobs=-1, scoring = f1))

---------------------------------------------------------------------------
_RemoteTraceback                          Traceback (most recent call last)
_RemoteTraceback: 
"""
Traceback (most recent call last):
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagesjoblibexternalslokyprocess_executor.py", line 418, in _process_worker
    r = call_item()
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagesjoblibexternalslokyprocess_executor.py", line 272, in __call__
    return self.fn(*self.args, **self.kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagesjoblib_parallel_backends.py", line 608, in __call__
    return self.func(*args, **kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagesjoblibparallel.py", line 256, in __call__
    for func, args, kwargs in self.items]
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagesjoblibparallel.py", line 256, in <listcomp>
    for func, args, kwargs in self.items]
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmodel_selection_validation.py", line 560, in _fit_and_score
    test_scores = _score(estimator, X_test, y_test, scorer)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmodel_selection_validation.py", line 607, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmetrics_scorer.py", line 88, in __call__
    *args, **kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmetrics_scorer.py", line 213, in _score
    **self._kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnutilsvalidation.py", line 73, in inner_f
    return f(**kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmetrics_classification.py", line 1047, in f1_score
    zero_division=zero_division)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnutilsvalidation.py", line 73, in inner_f
    return f(**kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmetrics_classification.py", line 1175, in fbeta_score
    zero_division=zero_division)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnutilsvalidation.py", line 73, in inner_f
    return f(**kwargs)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmetrics_classification.py", line 1434, in precision_recall_fscore_support
    pos_label)
  File "C:UsersAlienwareAnaconda3envstf2libsite-packagessklearnmetrics_classification.py", line 1265, in _check_set_wise_labels
    % (y_type, average_options))
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
"""

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
<ipython-input-48-0323d7b23fbc> in <module>
----> 1 np.mean(cross_val_score(model, X, y, cv=8, n_jobs=-1, scoring = f1))

~Anaconda3envstf2libsite-packagessklearnutilsvalidation.py in inner_f(*args, **kwargs)
     71                           FutureWarning)
     72         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73         return f(**kwargs)
     74     return inner_f
     75 

~Anaconda3envstf2libsite-packagessklearnmodel_selection_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    404                                 fit_params=fit_params,
    405                                 pre_dispatch=pre_dispatch,
--> 406                                 error_score=error_score)
    407     return cv_results['test_score']
    408 

~Anaconda3envstf2libsite-packagessklearnutilsvalidation.py in inner_f(*args, **kwargs)
     71                           FutureWarning)
     72         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73         return f(**kwargs)
     74     return inner_f
     75 

~Anaconda3envstf2libsite-packagessklearnmodel_selection_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    246             return_times=True, return_estimator=return_estimator,
    247             error_score=error_score)
--> 248         for train, test in cv.split(X, y, groups))
    249 
    250     zipped_scores = list(zip(*scores))

~Anaconda3envstf2libsite-packagesjoblibparallel.py in __call__(self, iterable)
   1015 
   1016             with self._backend.retrieval_context():
-> 1017                 self.retrieve()
   1018             # Make sure that we get a last message telling us we are done
   1019             elapsed_time = time.time() - self._start_time

~Anaconda3envstf2libsite-packagesjoblibparallel.py in retrieve(self)
    907             try:
    908                 if getattr(self._backend, 'supports_timeout', False):
--> 909                     self._output.extend(job.get(timeout=self.timeout))
    910                 else:
    911                     self._output.extend(job.get())

~Anaconda3envstf2libsite-packagesjoblib_parallel_backends.py in wrap_future_result(future, timeout)
    560         AsyncResults.get from multiprocessing."""
    561         try:
--> 562             return future.result(timeout=timeout)
    563         except LokyTimeoutError:
    564             raise TimeoutError()

~Anaconda3envstf2libconcurrentfutures_base.py in result(self, timeout)
    433                 raise CancelledError()
    434             elif self._state == FINISHED:
--> 435                 return self.__get_result()
    436             else:
    437                 raise TimeoutError()

~Anaconda3envstf2libconcurrentfutures_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. 
  

Ответ №1:

Когда вы посмотрите на пример, приведенный в документации, вы увидите, что вы должны передавать параметры функции score (здесь: f1_score) не как dict, а как аргументы ключевого слова вместо этого:

 f1 = make_scorer(f1_score, average='weighted')

np.mean(cross_val_score(model, X, y, cv=8, n_jobs=-1, scorin =f1))