Не удается срезать DatetimeIndex с помощью объекта datetime

#python #pandas #datetime #slice

Вопрос:

Я создаю фрейм данных, индексируемый объектом datetime, и индекс становится индексом DatetimeIndex.

 from datetime import datetime from datetime import timedelta from dateutil.parser import parse import pandas as pd   datestr=["2021/2/3","2021/01/6","2021/2/4","2021/2/7","2021/2/7","2021/2/9"] time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr))  

Я хочу разрезать его с помощью объекта datetime. Но потом приходит ошибка.

 time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),]  
 --------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~anaconda3libsite-packagespandascoreindexesbase.py in get_loc(self, key, method, tolerance)  3079 try: -gt; 3080 return self._engine.get_loc(casted_key)  3081 except KeyError as err:  pandas_libsindex.pyx in pandas._libs.index.DatetimeEngine.get_loc()  pandas_libsindex.pyx in pandas._libs.index.DatetimeEngine.get_loc()  pandas_libsindex.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates()  pandas_libsindex_class_helper.pxi in pandas._libs.index.Int64Engine._maybe_get_bool_indexer()  pandas_libsindex.pyx in pandas._libs.index.IndexEngine._unpack_bool_indexer()  KeyError: 1612137600000000000  The above exception was the direct cause of the following exception:  KeyError Traceback (most recent call last) ~anaconda3libsite-packagespandascoreindexesdatetimes.py in get_loc(self, key, method, tolerance)  685 try: --gt; 686 return Index.get_loc(self, key, method, tolerance)  687 except KeyError as err:  ~anaconda3libsite-packagespandascoreindexesbase.py in get_loc(self, key, method, tolerance)  3081 except KeyError as err: -gt; 3082 raise KeyError(key) from err  3083   KeyError: Timestamp('2021-02-01 00:00:00')  The above exception was the direct cause of the following exception:  KeyError Traceback (most recent call last) lt;ipython-input-45-db7af75f07acgt; in lt;modulegt;  1 time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr)) ----gt; 2 time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),]  ~anaconda3libsite-packagespandascoreindexing.py in __getitem__(self, key)  887 # AttributeError for IntervalTree get_value  888 return self.obj._get_value(*key, takeable=self._takeable) --gt; 889 return self._getitem_tuple(key)  890 else:  891 # we by definition only have the 0th axis  ~anaconda3libsite-packagespandascoreindexing.py in _getitem_tuple(self, tup)  1067 return self._multi_take(tup)  1068  -gt; 1069 return self._getitem_tuple_same_dim(tup)  1070   1071 def _get_label(self, label, axis: int):  ~anaconda3libsite-packagespandascoreindexing.py in _getitem_tuple_same_dim(self, tup)  773 continue  774  --gt; 775 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)  776 # We should never have retval.ndim lt; self.ndim, as that should  777 # be handled by the _getitem_lowerdim call above.  ~anaconda3libsite-packagespandascoreindexing.py in _getitem_axis(self, key, axis)  1100 if isinstance(key, slice):  1101 self._validate_key(key, axis) -gt; 1102 return self._get_slice_axis(key, axis=axis)  1103 elif com.is_bool_indexer(key):  1104 return self._getbool_axis(key, axis=axis)  ~anaconda3libsite-packagespandascoreindexing.py in _get_slice_axis(self, slice_obj, axis)  1134   1135 labels = obj._get_axis(axis) -gt; 1136 indexer = labels.slice_indexer(  1137 slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc"  1138 )  ~anaconda3libsite-packagespandascoreindexesdatetimes.py in slice_indexer(self, start, end, step, kind)  782   783 try: --gt; 784 return Index.slice_indexer(self, start, end, step, kind=kind)  785 except KeyError:  786 # For historical reasons DatetimeIndex by default supports  ~anaconda3libsite-packagespandascoreindexesbase.py in slice_indexer(self, start, end, step, kind)  5275 slice(1, 3, None)  5276 """ -gt; 5277 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)  5278   5279 # return a slice  ~anaconda3libsite-packagespandascoreindexesbase.py in slice_locs(self, start, end, step, kind)  5474 start_slice = None  5475 if start is not None: -gt; 5476 start_slice = self.get_slice_bound(start, "left", kind)  5477 if start_slice is None:  5478 start_slice = 0  ~anaconda3libsite-packagespandascoreindexesbase.py in get_slice_bound(self, label, side, kind)  5394 except ValueError:  5395 # raise the original KeyError -gt; 5396 raise err  5397   5398 if isinstance(slc, np.ndarray):  ~anaconda3libsite-packagespandascoreindexesbase.py in get_slice_bound(self, label, side, kind)  5388 # we need to look up the label  5389 try: -gt; 5390 slc = self.get_loc(label)  5391 except KeyError as err:  5392 try:  ~anaconda3libsite-packagespandascoreindexesdatetimes.py in get_loc(self, key, method, tolerance)  686 return Index.get_loc(self, key, method, tolerance)  687 except KeyError as err: --gt; 688 raise KeyError(orig_key) from err  689   690 def _maybe_cast_for_get_loc(self, key) -gt; Timestamp:  KeyError: Timestamp('2021-02-01 00:00:00')  

Комментарии:

1. вы можете просто использовать строки; time_data_2.loc["2021-02-01":"2021-02-07",]

2. Проблема здесь в том, что ваш индекс-это pandas.Timestamp объекты, и вы пытаетесь срезать datetime.datetime объекты.

3. Я знаю это, но я попытался добавить .timestamp() после объекта datetime и использовать его для нарезки. Это все равно не удается.