Ошибка рекурсии: превышена максимальная глубина рекурсии при доступе к фрейму данных

#python #pandas #dataframe #pickle

#python #pandas #фрейм данных #рассол

Вопрос:

Я пытаюсь поместить файл pickle в фрейм данных. Пробованные setrecursionlimit значения от 1500-5000 по-прежнему выдают ошибку.

Есть ли какой-либо другой способ получить доступ к файлу pickle и поместить его в фрейм данных?

 import pandas as pd
import numpy as np
import sys
sys.setrecursionlimit(5000)

df = pd.read_pickle("data.pkl",compression=None)

df.head()
  

изображение

Весь журнал:

 ---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
<ipython-input-4-c42a15b2c7cf> in <module>
----> 1 df.head()

/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in head(self, n)
   4787         """
   4788 
-> 4789         return self.iloc[:n]
   4790 
   4791     def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries:

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in __getitem__(self, key)
   1766 
   1767             maybe_callable = com.apply_if_callable(key, self.obj)
-> 1768             return self._getitem_axis(maybe_callable, axis=axis)
   1769 
   1770     def _is_scalar_access(self, key: Tuple):

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   2116     def _getitem_axis(self, key, axis: int):
   2117         if isinstance(key, slice):
-> 2118             return self._get_slice_axis(key, axis=axis)
   2119 
   2120         if isinstance(key, list):

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _get_slice_axis(self, slice_obj, axis)
   1747             return obj.copy(deep=False)
   1748 
-> 1749         indexer = self._convert_slice_indexer(slice_obj, axis)
   1750         return self._slice(indexer, axis=axis, kind="iloc")
   1751 

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _convert_slice_indexer(self, key, axis)
    743     def _convert_slice_indexer(self, key: slice, axis: int):
    744         # if we are accessing via lowered dim, use the last dim
--> 745         ax = self.obj._get_axis(min(axis, self.ndim - 1))
    746         return ax._convert_slice_indexer(key, kind=self.name)
    747 

pandas/_libs/indexing.pyx in pandas._libs.indexing._NDFrameIndexerBase.ndim.__get__()

/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5270             return object.__getattribute__(self, name)
   5271         else:
-> 5272             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5273                 return self[name]
   5274             return object.__getattribute__(self, name)

... last 1 frames repeated, from the frame below ...

/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5270             return object.__getattribute__(self, name)
   5271         else:
-> 5272             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5273                 return self[name]
   5274             return object.__getattribute__(self, name)

RecursionError: maximum recursion depth exceeded
  

Комментарии:

1. что находится в файле pickle? Как это было создано? Вы его создали? Я думаю, что будет очень сложно ответить на этот вопрос без дополнительной информации о том, что находится в этом рассоле.

Ответ №1:

Вы пытаетесь прочитать фрейм данных со столбцами, которые содержат другие фреймы данных?

Если это так, и все строки содержат одну и ту же копию фрейма данных, вы можете рассмотреть возможность преобразования столбцов внутреннего фрейма в фактические столбцы во внешнем фрейме.

Вы можете найти столбцы, содержащие фреймы, следующим образом:

 [col for col in df.select_dtypes(object).columns if isinstance(df[col].iloc[0], pd.DataFrame)]
  

Чтобы преобразовать столбцы внутреннего фрейма в фактические столбцы, вы можете попробовать что-то вроде этого:

 from typing import List
import pandas as pd


def find_frame_cols(df: pd.DataFrame) -> List[str]:
    """Find columns in a DataFrame that hold DataFrames.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame to search for columns having DataFrames.

    Returns
    -------
    List[str]
        List of column names holding DataFrames.
    """
    return [
        col for col in df.select_dtypes(object).columns
        if isinstance(df[col].iloc[0], pd.DataFrame)
    ]


def frame_to_cols(df: pd.DataFrame, drop_after: bool = True) -> pd.DataFrame:
    """Convert columns holding pandas DataFrame objects to columns.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame to convert.
    drop_after : bool, default=True
        Whether to drop the columns with DataFrame objects, after conversion.

    Returns
    -------
    pd.DataFrame
        DataFrame with columns from inner frame.
    """
    df_cols = find_frame_cols(df)
    for col in df_cols:
        inner_frame = df[col].iloc[0].infer_objects()
        inner_frame = inner_frame[inner_frame.columns.difference(
            find_frame_cols(inner_frame))
        ].rename(
            columns={
                inner_col: f"{col}_{inner_col}" for inner_col in inner_frame.columns
            }
        )
        df[inner_frame.columns] = inner_frame
    if drop_after:
        df = df.drop(columns=df_cols, errors="ignore")
    return df


clean_df = frame_to_cols(df)
  

Пример:

 import numpy as np
import pandas as pd

data = pd.DataFrame(np.random.random(50), columns=['x'])
data['df'] = [data] * data.shape[0]
  

Попытка прочитать data выходные данные:

 >>> data

---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
    392                         if cls is not object 
    393                                 and callable(cls.__dict__.get('__repr__')):
--> 394                             return _repr_pprint(obj, self, cycle)
    395 
    396             return _default_pprint(obj, self, cycle)

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
    698     """A pprint that just redirects to the normal repr function."""
    699     # Find newlines and replace them with p.break_()
--> 700     output = repr(obj)
    701     lines = output.splitlines()
    702     with p.group():

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in __repr__(self)
    742         else:
    743             width = None
--> 744         self.to_string(
    745             buf=buf,
    746             max_rows=max_rows,

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
    881                 line_width=line_width,
    882             )
--> 883             return formatter.to_string(buf=buf, encoding=encoding)
    884 
    885     # ----------------------------------------------------------------------

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in to_string(self, buf, encoding)
    919         encoding: Optional[str] = None,
    920     ) -> Optional[str]:
--> 921         return self.get_result(buf=buf, encoding=encoding)
    922 
    923     def to_latex(

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self, buf, encoding)
    518         """
    519         with self.get_buffer(buf, encoding=encoding) as f:
--> 520             self.write_result(buf=f)
    521             if buf is None:
    522                 return f.getvalue()

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in write_result(self, buf)
    832         else:
    833 
--> 834             strcols = self._to_str_columns()
    835             if self.line_width is None:  # no need to wrap around just print
    836                 # the whole frame

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _to_str_columns(self)
    768                     col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
    769                 )
--> 770                 fmt_values = self._format_col(i)
    771                 fmt_values = _make_fixed_width(
    772                     fmt_values, self.justify, minimum=header_colwidth, adj=self.adj

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_col(self, i)
    952         frame = self.tr_frame
    953         formatter = self._get_formatter(i)
--> 954         return format_array(
    955             frame.iloc[:, i]._values,
    956             formatter,

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting)
   1177     )
   1178 
-> 1179     return fmt_obj.get_result()
   1180 
   1181 

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self)
   1208 
   1209     def get_result(self) -> List[str]:
-> 1210         fmt_values = self._format_strings()
   1211         return _make_fixed_width(fmt_values, self.justify)
   1212 

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_strings(self)
   1275                 else:
   1276                     tpl = " {v}"
-> 1277                 fmt_values.append(tpl.format(v=_format(v)))
   1278 
   1279         return fmt_values

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format(x)
   1246                 return self.na_rep
   1247             elif isinstance(x, PandasObject):
-> 1248                 return str(x)
   1249             else:
   1250                 # object dtype

... last 11 frames repeated, from the frame below ...

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in __repr__(self)
    742         else:
    743             width = None
--> 744         self.to_string(
    745             buf=buf,
    746             max_rows=max_rows,

RecursionError: maximum recursion depth exceeded in __instancecheck__
---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
    343             method = get_real_method(obj, self.print_method)
    344             if method is not None:
--> 345                 return method()
    346             return None
    347         else:

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in _repr_html_(self)
    796                 render_links=False,
    797             )
--> 798             return formatter.to_html(notebook=True)
    799         else:
    800             return None

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in to_html(self, buf, encoding, classes, notebook, border)
    986 
    987         Klass = NotebookFormatter if notebook else HTMLFormatter
--> 988         return Klass(self, classes=classes, border=border).get_result(
    989             buf=buf, encoding=encoding
    990         )

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self, buf, encoding)
    518         """
    519         with self.get_buffer(buf, encoding=encoding) as f:
--> 520             self.write_result(buf=f)
    521             if buf is None:
    522                 return f.getvalue()

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in write_result(self, buf)
    201 
    202     def write_result(self, buf: IO[str]) -> None:
--> 203         buffer_put_lines(buf, self.render())
    204 
    205     def _write_table(self, indent: int = 0) -> None:

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in render(self)
    605         self.write("<div>")
    606         self.write_style()
--> 607         super().render()
    608         self.write("</div>")
    609         return self.elements

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in render(self)
    190 
    191     def render(self) -> List[str]:
--> 192         self._write_table()
    193 
    194         if self.should_show_dimensions:

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in _write_table(self, indent)
    231             self._write_header(indent   self.indent_delta)
    232 
--> 233         self._write_body(indent   self.indent_delta)
    234 
    235         self.write("</table>", indent)

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in _write_body(self, indent)
    377     def _write_body(self, indent: int) -> None:
    378         self.write("<tbody>", indent)
--> 379         fmt_values = self._get_formatted_values()
    380 
    381         # write values

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in _get_formatted_values(self)
    569 
    570     def _get_formatted_values(self) -> Dict[int, List[str]]:
--> 571         return {i: self.fmt._format_col(i) for i in range(self.ncols)}
    572 
    573     def _get_columns_formatted_values(self) -> List[str]:

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/html.py in <dictcomp>(.0)
    569 
    570     def _get_formatted_values(self) -> Dict[int, List[str]]:
--> 571         return {i: self.fmt._format_col(i) for i in range(self.ncols)}
    572 
    573     def _get_columns_formatted_values(self) -> List[str]:

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_col(self, i)
    952         frame = self.tr_frame
    953         formatter = self._get_formatter(i)
--> 954         return format_array(
    955             frame.iloc[:, i]._values,
    956             formatter,

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal, leading_space, quoting)
   1177     )
   1178 
-> 1179     return fmt_obj.get_result()
   1180 
   1181 

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self)
   1208 
   1209     def get_result(self) -> List[str]:
-> 1210         fmt_values = self._format_strings()
   1211         return _make_fixed_width(fmt_values, self.justify)
   1212 

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_strings(self)
   1275                 else:
   1276                     tpl = " {v}"
-> 1277                 fmt_values.append(tpl.format(v=_format(v)))
   1278 
   1279         return fmt_values

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format(x)
   1246                 return self.na_rep
   1247             elif isinstance(x, PandasObject):
-> 1248                 return str(x)
   1249             else:
   1250                 # object dtype

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in __repr__(self)
    742         else:
    743             width = None
--> 744         self.to_string(
    745             buf=buf,
    746             max_rows=max_rows,

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
    881                 line_width=line_width,
    882             )
--> 883             return formatter.to_string(buf=buf, encoding=encoding)
    884 
    885     # ----------------------------------------------------------------------

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in to_string(self, buf, encoding)
    919         encoding: Optional[str] = None,
    920     ) -> Optional[str]:
--> 921         return self.get_result(buf=buf, encoding=encoding)
    922 
    923     def to_latex(

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in get_result(self, buf, encoding)
    518         """
    519         with self.get_buffer(buf, encoding=encoding) as f:
--> 520             self.write_result(buf=f)
    521             if buf is None:
    522                 return f.getvalue()

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in write_result(self, buf)
    832         else:
    833 
--> 834             strcols = self._to_str_columns()
    835             if self.line_width is None:  # no need to wrap around just print
    836                 # the whole frame

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _to_str_columns(self)
    768                     col_space.get(c, 0), *(self.adj.len(x) for x in cheader)
    769                 )
--> 770                 fmt_values = self._format_col(i)
    771                 fmt_values = _make_fixed_width(
    772                     fmt_values, self.justify, minimum=header_colwidth, adj=self.adj

... last 11 frames repeated, from the frame below ...

~/Library/Caches/pypoetry/virtualenvs/sandbox/lib/python3.8/site-packages/pandas/io/formats/format.py in _format_col(self, i)
    952         frame = self.tr_frame
    953         formatter = self._get_formatter(i)
--> 954         return format_array(
    955             frame.iloc[:, i]._values,
    956             formatter,

RecursionError: maximum recursion depth exceeded in __instancecheck__
  

Использование frame_to_cols функции:

 >>> frame_to_cols(data)

    x   df_x
0   0.887988    0.887988
1   0.707261    0.707261
2   0.095420    0.095420
3   0.353984    0.353984
4   0.815965    0.815965
5   0.815945    0.815945
6   0.448203    0.448203
7   0.919158    0.919158
8   0.081796    0.081796
9   0.291415    0.291415
10  0.352689    0.352689
11  0.110490    0.110490
12  0.573241    0.573241
13  0.748795    0.748795
14  0.540989    0.540989
15  0.104442    0.104442
16  0.805393    0.805393
17  0.749622    0.749622
18  0.031876    0.031876
19  0.462026    0.462026
20  0.998844    0.998844
21  0.010998    0.010998
22  0.661542    0.661542
23  0.643095    0.643095
24  0.649859    0.649859
25  0.543432    0.543432
26  0.256629    0.256629
27  0.297269    0.297269
28  0.702747    0.702747
29  0.495606    0.495606
30  0.848662    0.848662
31  0.269180    0.269180
32  0.827374    0.827374
33  0.822714    0.822714
34  0.083969    0.083969
35  0.012043    0.012043
36  0.545633    0.545633
37  0.045181    0.045181
38  0.622139    0.622139
39  0.863135    0.863135
40  0.858195    0.858195
41  0.603239    0.603239
42  0.099584    0.099584
43  0.034516    0.034516
44  0.830687    0.830687
45  0.214127    0.214127
46  0.440444    0.440444
47  0.915540    0.915540
48  0.324738    0.324738
49  0.997392    0.997392