#python #pandas #datetime #slice
Вопрос:
Я создаю фрейм данных, индексируемый объектом datetime, и индекс становится индексом DatetimeIndex.
from datetime import datetime from datetime import timedelta from dateutil.parser import parse import pandas as pd datestr=["2021/2/3","2021/01/6","2021/2/4","2021/2/7","2021/2/7","2021/2/9"] time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr))
Я хочу разрезать его с помощью объекта datetime. Но потом приходит ошибка.
time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),]
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~anaconda3libsite-packagespandascoreindexesbase.py in get_loc(self, key, method, tolerance) 3079 try: -gt; 3080 return self._engine.get_loc(casted_key) 3081 except KeyError as err: pandas_libsindex.pyx in pandas._libs.index.DatetimeEngine.get_loc() pandas_libsindex.pyx in pandas._libs.index.DatetimeEngine.get_loc() pandas_libsindex.pyx in pandas._libs.index.IndexEngine._get_loc_duplicates() pandas_libsindex_class_helper.pxi in pandas._libs.index.Int64Engine._maybe_get_bool_indexer() pandas_libsindex.pyx in pandas._libs.index.IndexEngine._unpack_bool_indexer() KeyError: 1612137600000000000 The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) ~anaconda3libsite-packagespandascoreindexesdatetimes.py in get_loc(self, key, method, tolerance) 685 try: --gt; 686 return Index.get_loc(self, key, method, tolerance) 687 except KeyError as err: ~anaconda3libsite-packagespandascoreindexesbase.py in get_loc(self, key, method, tolerance) 3081 except KeyError as err: -gt; 3082 raise KeyError(key) from err 3083 KeyError: Timestamp('2021-02-01 00:00:00') The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) lt;ipython-input-45-db7af75f07acgt; in lt;modulegt; 1 time_data_2=pd.DataFrame({"data_1":[0.3,0.4,0.9,0.5,0.3,0.3],"data_2":[1,2,3,4,5,6]},index=pd.to_datetime(datestr)) ----gt; 2 time_data_2.loc[datetime(2021,2,1):datetime(2021,2,7),] ~anaconda3libsite-packagespandascoreindexing.py in __getitem__(self, key) 887 # AttributeError for IntervalTree get_value 888 return self.obj._get_value(*key, takeable=self._takeable) --gt; 889 return self._getitem_tuple(key) 890 else: 891 # we by definition only have the 0th axis ~anaconda3libsite-packagespandascoreindexing.py in _getitem_tuple(self, tup) 1067 return self._multi_take(tup) 1068 -gt; 1069 return self._getitem_tuple_same_dim(tup) 1070 1071 def _get_label(self, label, axis: int): ~anaconda3libsite-packagespandascoreindexing.py in _getitem_tuple_same_dim(self, tup) 773 continue 774 --gt; 775 retval = getattr(retval, self.name)._getitem_axis(key, axis=i) 776 # We should never have retval.ndim lt; self.ndim, as that should 777 # be handled by the _getitem_lowerdim call above. ~anaconda3libsite-packagespandascoreindexing.py in _getitem_axis(self, key, axis) 1100 if isinstance(key, slice): 1101 self._validate_key(key, axis) -gt; 1102 return self._get_slice_axis(key, axis=axis) 1103 elif com.is_bool_indexer(key): 1104 return self._getbool_axis(key, axis=axis) ~anaconda3libsite-packagespandascoreindexing.py in _get_slice_axis(self, slice_obj, axis) 1134 1135 labels = obj._get_axis(axis) -gt; 1136 indexer = labels.slice_indexer( 1137 slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc" 1138 ) ~anaconda3libsite-packagespandascoreindexesdatetimes.py in slice_indexer(self, start, end, step, kind) 782 783 try: --gt; 784 return Index.slice_indexer(self, start, end, step, kind=kind) 785 except KeyError: 786 # For historical reasons DatetimeIndex by default supports ~anaconda3libsite-packagespandascoreindexesbase.py in slice_indexer(self, start, end, step, kind) 5275 slice(1, 3, None) 5276 """ -gt; 5277 start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) 5278 5279 # return a slice ~anaconda3libsite-packagespandascoreindexesbase.py in slice_locs(self, start, end, step, kind) 5474 start_slice = None 5475 if start is not None: -gt; 5476 start_slice = self.get_slice_bound(start, "left", kind) 5477 if start_slice is None: 5478 start_slice = 0 ~anaconda3libsite-packagespandascoreindexesbase.py in get_slice_bound(self, label, side, kind) 5394 except ValueError: 5395 # raise the original KeyError -gt; 5396 raise err 5397 5398 if isinstance(slc, np.ndarray): ~anaconda3libsite-packagespandascoreindexesbase.py in get_slice_bound(self, label, side, kind) 5388 # we need to look up the label 5389 try: -gt; 5390 slc = self.get_loc(label) 5391 except KeyError as err: 5392 try: ~anaconda3libsite-packagespandascoreindexesdatetimes.py in get_loc(self, key, method, tolerance) 686 return Index.get_loc(self, key, method, tolerance) 687 except KeyError as err: --gt; 688 raise KeyError(orig_key) from err 689 690 def _maybe_cast_for_get_loc(self, key) -gt; Timestamp: KeyError: Timestamp('2021-02-01 00:00:00')
Комментарии:
1. вы можете просто использовать строки;
time_data_2.loc["2021-02-01":"2021-02-07",]
2. Проблема здесь в том, что ваш индекс-это
pandas.Timestamp
объекты, и вы пытаетесь срезатьdatetime.datetime
объекты.3. Я знаю это, но я попытался добавить .timestamp() после объекта datetime и использовать его для нарезки. Это все равно не удается.