#python #linear-regression #statsmodels #valueerror
Вопрос:
Эта проблема возникает при запуске множественной регрессии в цикле for. Один из примеров этого представлен ниже: переменная y выглядит следующим образом:
0 NaN
1 NaN
2 3
3 NaN
4 2
5 NaN
6 NaN
7 NaN
8 4
9 NaN
Name: 1794, dtype: float64
переменная x
0 1 2 3 4
0 NaN NaN NaN 24.1071 -143.421717
1 NaN NaN NaN 18.1163 -144.781828
2 NaN NaN NaN 19.2395 -138.566242
3 3 0.0 0.0 21.2295 -153.737705
4 NaN NaN NaN 21.1719 -163.109375
5 2 0.0 0.0 23.6562 -168.140625
6 NaN NaN NaN 26.9867 -125.666667
7 NaN NaN NaN 28.1184 -107.486842
8 NaN NaN NaN 30.5556 -125.416667
9 4 0.0 0.0 28.0714 -136.071429
Код регрессии:
reg = sm.OLS(y, sm.add_constant(x, has_constant = "add"), missing="drop").fit()
Сообщение об ошибке:
ValueError Traceback (most recent call last)
<ipython-input-186-ff72de5301d3> in <module>
----> 1 reg = sm.OLS(y, sm.add_constant(x, has_constant = "add"), missing="drop").fit()
C:ProgramDataAnaconda3libsite-packagesstatsmodelsregressionlinear_model.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
870 def __init__(self, endog, exog=None, missing='none', hasconst=None,
871 **kwargs):
--> 872 super(OLS, self).__init__(endog, exog, missing=missing,
873 hasconst=hasconst, **kwargs)
874 if "weights" in self._init_keys:
C:ProgramDataAnaconda3libsite-packagesstatsmodelsregressionlinear_model.py in __init__(self, endog, exog, weights, missing, hasconst, **kwargs)
701 else:
702 weights = weights.squeeze()
--> 703 super(WLS, self).__init__(endog, exog, missing=missing,
704 weights=weights, hasconst=hasconst, **kwargs)
705 nobs = self.exog.shape[0]
C:ProgramDataAnaconda3libsite-packagesstatsmodelsregressionlinear_model.py in __init__(self, endog, exog, **kwargs)
188 """
189 def __init__(self, endog, exog, **kwargs):
--> 190 super(RegressionModel, self).__init__(endog, exog, **kwargs)
191 self._data_attr.extend(['pinv_wexog', 'weights'])
192
C:ProgramDataAnaconda3libsite-packagesstatsmodelsbasemodel.py in __init__(self, endog, exog, **kwargs)
235
236 def __init__(self, endog, exog=None, **kwargs):
--> 237 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
238 self.initialize()
239
C:ProgramDataAnaconda3libsite-packagesstatsmodelsbasemodel.py in __init__(self, endog, exog, **kwargs)
75 missing = kwargs.pop('missing', 'none')
76 hasconst = kwargs.pop('hasconst', None)
---> 77 self.data = self._handle_data(endog, exog, missing, hasconst,
78 **kwargs)
79 self.k_constant = self.data.k_constant
C:ProgramDataAnaconda3libsite-packagesstatsmodelsbasemodel.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
99
100 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
--> 101 data = handle_data(endog, exog, missing, hasconst, **kwargs)
102 # kwargs arrays could have changed, easier to just attach here
103 for key in kwargs:
C:ProgramDataAnaconda3libsite-packagesstatsmodelsbasedata.py in handle_data(endog, exog, missing, hasconst, **kwargs)
670
671 klass = handle_data_class_factory(endog, exog)
--> 672 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
673 **kwargs)
C:ProgramDataAnaconda3libsite-packagesstatsmodelsbasedata.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
85 self.const_idx = None
86 self.k_constant = 0
---> 87 self._handle_constant(hasconst)
88 self._check_integrity()
89 self._cache = {}
C:ProgramDataAnaconda3libsite-packagesstatsmodelsbasedata.py in _handle_constant(self, hasconst)
129 # detect where the constant is
130 check_implicit = False
--> 131 exog_max = np.max(self.exog, axis=0)
132 if not np.isfinite(exog_max).all():
133 raise MissingDataError('exog contains inf or nans')
<__array_function__ internals> in amax(*args, **kwargs)
C:ProgramDataAnaconda3libsite-packagesnumpycorefromnumeric.py in amax(a, axis, out, keepdims, initial, where)
2703 5
2704 """
-> 2705 return _wrapreduction(a, np.maximum, 'max', axis, None, out,
2706 keepdims=keepdims, initial=initial, where=where)
2707
C:ProgramDataAnaconda3libsite-packagesnumpycorefromnumeric.py in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs)
85 return reduction(axis=axis, out=out, **passkwargs)
86
---> 87 return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
88
89
ValueError: zero-size array to reduction operation maximum which has no identity
Я знаю, что есть похожие сообщения об этой ошибке, но я не смог найти точный случай, и все, что было предложено в других сообщениях, не помогло.
Комментарии:
1. Похоже, что после удаления всех строк по крайней мере с одним nan в переменных y или x не осталось наблюдений