#python #pandas #dataframe #pickle
#python #pandas #фрейм данных #рассол
Вопрос:
Я пытаюсь поместить файл pickle в фрейм данных. Пробованные setrecursionlimit
значения от 1500-5000 по-прежнему выдают ошибку.
Есть ли какой-либо другой способ получить доступ к файлу pickle и поместить его в фрейм данных?
import pandas as pd
import numpy as np
import sys
sys.setrecursionlimit(5000)
df = pd.read_pickle("data.pkl",compression=None)
df.head()
Весь журнал:
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
<ipython-input-4-c42a15b2c7cf> in <module>
----> 1 df.head()
/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in head(self, n)
4787 """
4788
-> 4789 return self.iloc[:n]
4790
4791 def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries:
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1766
1767 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1768 return self._getitem_axis(maybe_callable, axis=axis)
1769
1770 def _is_scalar_access(self, key: Tuple):
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
2116 def _getitem_axis(self, key, axis: int):
2117 if isinstance(key, slice):
-> 2118 return self._get_slice_axis(key, axis=axis)
2119
2120 if isinstance(key, list):
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
1747 return obj.copy(deep=False)
1748
-> 1749 indexer = self._convert_slice_indexer(slice_obj, axis)
1750 return self._slice(indexer, axis=axis, kind="iloc")
1751
/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _convert_slice_indexer(self, key, axis)
743 def _convert_slice_indexer(self, key: slice, axis: int):
744 # if we are accessing via lowered dim, use the last dim
--> 745 ax = self.obj._get_axis(min(axis, self.ndim - 1))
746 return ax._convert_slice_indexer(key, kind=self.name)
747
pandas/_libs/indexing.pyx in pandas._libs.indexing._NDFrameIndexerBase.ndim.__get__()
/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
5270 return object.__getattribute__(self, name)
5271 else:
-> 5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5273 return self[name]
5274 return object.__getattribute__(self, name)
... last 1 frames repeated, from the frame below ...
/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
5270 return object.__getattribute__(self, name)
5271 else:
-> 5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5273 return self[name]
5274 return object.__getattribute__(self, name)
RecursionError: maximum recursion depth exceeded
Комментарии:
1. что находится в файле pickle? Как это было создано? Вы его создали? Я думаю, что будет очень сложно ответить на этот вопрос без дополнительной информации о том, что находится в этом рассоле.
Ответ №1:
Вы пытаетесь прочитать фрейм данных со столбцами, которые содержат другие фреймы данных?
Если это так, и все строки содержат одну и ту же копию фрейма данных, вы можете рассмотреть возможность преобразования столбцов внутреннего фрейма в фактические столбцы во внешнем фрейме.
Вы можете найти столбцы, содержащие фреймы, следующим образом:
[col for col in df.select_dtypes(object).columns if isinstance(df[col].iloc[0], pd.DataFrame)]
Чтобы преобразовать столбцы внутреннего фрейма в фактические столбцы, вы можете попробовать что-то вроде этого:
from typing import List
import pandas as pd
def find_frame_cols(df: pd.DataFrame) -> List[str]:
"""Find columns in a DataFrame that hold DataFrames.
Parameters
----------
df : pd.DataFrame
DataFrame to search for columns having DataFrames.
Returns
-------
List[str]
List of column names holding DataFrames.
"""
return [
col for col in df.select_dtypes(object).columns
if isinstance(df[col].iloc[0], pd.DataFrame)
]
def frame_to_cols(df: pd.DataFrame, drop_after: bool = True) -> pd.DataFrame:
"""Convert columns holding pandas DataFrame objects to columns.
Parameters
----------
df : pd.DataFrame
DataFrame to convert.
drop_after : bool, default=True
Whether to drop the columns with DataFrame objects, after conversion.
Returns
-------
pd.DataFrame
DataFrame with columns from inner frame.
"""
df_cols = find_frame_cols(df)
for col in df_cols:
inner_frame = df[col].iloc[0].infer_objects()
inner_frame = inner_frame[inner_frame.columns.difference(
find_frame_cols(inner_frame))
].rename(
columns={
inner_col: f"{col}_{inner_col}" for inner_col in inner_frame.columns
}
)
df[inner_frame.columns] = inner_frame
if drop_after:
df = df.drop(columns=df_cols, errors="ignore")
return df
clean_df = frame_to_cols(df)
Пример:
import numpy as np
import pandas as pd
data = pd.DataFrame(np.random.random(50), columns=['x'])
data['df'] = [data] * data.shape[0]
Попытка прочитать data
выходные данные:
>>> data
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in __repr__(self)
742 else:
743 width = None
--> 744 self.to_string(
745 buf=buf,
746 max_rows=max_rows,
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
881 line_width=line_width,
882 )
--> 883 return formatter.to_string(buf=buf, encoding=encoding)
884
885 # ----------------------------------------------------------------------
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in to_string(self, buf, encoding)
919 encoding: Optional[str] = None,
920 ) -> Optional[str]:
--> 921 return self.get_result(buf=buf, encoding=encoding)
922
923 def to_latex(
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self, buf, encoding)
518 """
519 with self.get_buffer(buf, encoding=encoding) as f:
--> 520 self.write_result(buf=f)
521 if buf is None:
522 return f.getvalue()
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in write_result(self, buf)
832 else:
833
--> 834 strcols = self._to_str_columns()
835 if self.line_width is None: # no need to wrap around just print
836 # the whole frame
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _to_str_columns(self)
768 col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
769 )
--> 770 fmt_values = self._format_col(i)
771 fmt_values = _make_fixed_width(
772 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_col(self, i)
952 frame = self.tr_frame
953 formatter = self._get_formatter(i)
--> 954 return format_array(
955 frame.iloc[:, i]._values,
956 formatter,
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting)
1177 )
1178
-> 1179 return fmt_obj.get_result()
1180
1181
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self)
1208
1209 def get_result(self) -> List[str]:
-> 1210 fmt_values = self._format_strings()
1211 return _make_fixed_width(fmt_values, self.justify)
1212
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_strings(self)
1275 else:
1276 tpl = " {v}"
-> 1277 fmt_values.append(tpl.format(v=_format(v)))
1278
1279 return fmt_values
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format(x)
1246 return self.na_rep
1247 elif isinstance(x, PandasObject):
-> 1248 return str(x)
1249 else:
1250 # object dtype
... last 11 frames repeated, from the frame below ...
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in __repr__(self)
742 else:
743 width = None
--> 744 self.to_string(
745 buf=buf,
746 max_rows=max_rows,
RecursionError: maximum recursion depth exceeded in __instancecheck__
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in _repr_html_(self)
796 render_links=False,
797 )
--> 798 return formatter.to_html(notebook=True)
799 else:
800 return None
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in to_html(self, buf, encoding, classes, notebook, border)
986
987 Klass = NotebookFormatter if notebook else HTMLFormatter
--> 988 return Klass(self, classes=classes, border=border).get_result(
989 buf=buf, encoding=encoding
990 )
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self, buf, encoding)
518 """
519 with self.get_buffer(buf, encoding=encoding) as f:
--> 520 self.write_result(buf=f)
521 if buf is None:
522 return f.getvalue()
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in write_result(self, buf)
201
202 def write_result(self, buf: IO[str]) -> None:
--> 203 buffer_put_lines(buf, self.render())
204
205 def _write_table(self, indent: int = 0) -> None:
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in render(self)
605 self.write("<div>")
606 self.write_style()
--> 607 super().render()
608 self.write("</div>")
609 return self.elements
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in render(self)
190
191 def render(self) -> List[str]:
--> 192 self._write_table()
193
194 if self.should_show_dimensions:
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in _write_table(self, indent)
231 self._write_header(indent self.indent_delta)
232
--> 233 self._write_body(indent self.indent_delta)
234
235 self.write("</table>", indent)
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in _write_body(self, indent)
377 def _write_body(self, indent: int) -> None:
378 self.write("<tbody>", indent)
--> 379 fmt_values = self._get_formatted_values()
380
381 # write values
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in _get_formatted_values(self)
569
570 def _get_formatted_values(self) -> Dict[int, List[str]]:
--> 571 return {i: self.fmt._format_col(i) for i in range(self.ncols)}
572
573 def _get_columns_formatted_values(self) -> List[str]:
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in <dictcomp>(.0)
569
570 def _get_formatted_values(self) -> Dict[int, List[str]]:
--> 571 return {i: self.fmt._format_col(i) for i in range(self.ncols)}
572
573 def _get_columns_formatted_values(self) -> List[str]:
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_col(self, i)
952 frame = self.tr_frame
953 formatter = self._get_formatter(i)
--> 954 return format_array(
955 frame.iloc[:, i]._values,
956 formatter,
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting)
1177 )
1178
-> 1179 return fmt_obj.get_result()
1180
1181
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self)
1208
1209 def get_result(self) -> List[str]:
-> 1210 fmt_values = self._format_strings()
1211 return _make_fixed_width(fmt_values, self.justify)
1212
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_strings(self)
1275 else:
1276 tpl = " {v}"
-> 1277 fmt_values.append(tpl.format(v=_format(v)))
1278
1279 return fmt_values
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format(x)
1246 return self.na_rep
1247 elif isinstance(x, PandasObject):
-> 1248 return str(x)
1249 else:
1250 # object dtype
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in __repr__(self)
742 else:
743 width = None
--> 744 self.to_string(
745 buf=buf,
746 max_rows=max_rows,
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
881 line_width=line_width,
882 )
--> 883 return formatter.to_string(buf=buf, encoding=encoding)
884
885 # ----------------------------------------------------------------------
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in to_string(self, buf, encoding)
919 encoding: Optional[str] = None,
920 ) -> Optional[str]:
--> 921 return self.get_result(buf=buf, encoding=encoding)
922
923 def to_latex(
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self, buf, encoding)
518 """
519 with self.get_buffer(buf, encoding=encoding) as f:
--> 520 self.write_result(buf=f)
521 if buf is None:
522 return f.getvalue()
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in write_result(self, buf)
832 else:
833
--> 834 strcols = self._to_str_columns()
835 if self.line_width is None: # no need to wrap around just print
836 # the whole frame
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _to_str_columns(self)
768 col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
769 )
--> 770 fmt_values = self._format_col(i)
771 fmt_values = _make_fixed_width(
772 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
... last 11 frames repeated, from the frame below ...
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_col(self, i)
952 frame = self.tr_frame
953 formatter = self._get_formatter(i)
--> 954 return format_array(
955 frame.iloc[:, i]._values,
956 formatter,
RecursionError: maximum recursion depth exceeded in __instancecheck__
Использование frame_to_cols
функции:
>>> frame_to_cols(data)
x df_x
0 0.887988 0.887988
1 0.707261 0.707261
2 0.095420 0.095420
3 0.353984 0.353984
4 0.815965 0.815965
5 0.815945 0.815945
6 0.448203 0.448203
7 0.919158 0.919158
8 0.081796 0.081796
9 0.291415 0.291415
10 0.352689 0.352689
11 0.110490 0.110490
12 0.573241 0.573241
13 0.748795 0.748795
14 0.540989 0.540989
15 0.104442 0.104442
16 0.805393 0.805393
17 0.749622 0.749622
18 0.031876 0.031876
19 0.462026 0.462026
20 0.998844 0.998844
21 0.010998 0.010998
22 0.661542 0.661542
23 0.643095 0.643095
24 0.649859 0.649859
25 0.543432 0.543432
26 0.256629 0.256629
27 0.297269 0.297269
28 0.702747 0.702747
29 0.495606 0.495606
30 0.848662 0.848662
31 0.269180 0.269180
32 0.827374 0.827374
33 0.822714 0.822714
34 0.083969 0.083969
35 0.012043 0.012043
36 0.545633 0.545633
37 0.045181 0.045181
38 0.622139 0.622139
39 0.863135 0.863135
40 0.858195 0.858195
41 0.603239 0.603239
42 0.099584 0.099584
43 0.034516 0.034516
44 0.830687 0.830687
45 0.214127 0.214127
46 0.440444 0.440444
47 0.915540 0.915540
48 0.324738 0.324738
49 0.997392 0.997392