как экспортировать csv-файл в jupyter из onedrive sharepoint?

#python #pandas #sharepoint #jupyter-notebook

Вопрос:

Всем привет, я пытаюсь подключить jupyter к своему бизнесу sharepoint/onedrive, чтобы экспортировать csv (файл большого размера) и проанализировать данные в нем . Я перепробовал все методы в Интернете и stackoverflow, но в лучшем случае у меня 404 ошибки запрещены. Вот один из методов с ошибкой. кто — нибудь может мне помочь ? большое спасибо.

 !{sys.executable} -m pip install Office365-REST-Python-Client

#import all the libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File 
import io
import pandas as pd

#target url taken from sharepoint and credentials
url = 'shared url of the csv file'
username = 'name-id@group-name.onmicrosoft.com'
password = '01password'

ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
  ctx = ClientContext(url, ctx_auth)
  web = ctx.web
  ctx.load(web)
  ctx.execute_query()
  print("Authentication successful")

response = File.open_binary(ctx, url)

#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start

#read excel file and each sheet into pandas dataframe 
df = pd.read_excel(bytes_file_obj)``` 

then i have this error :
``` ---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-8ce9c72ed35a> in <module>
     30 
     31 #read excel file and each sheet into pandas dataframe
---> 32 df = pd.read_excel(bytes_file_obj)

~anaconda3libsite-packagespandasutil_decorators.py in wrapper(*args, **kwargs)
    297                 )
    298                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 299             return func(*args, **kwargs)
    300 
    301         return wrapper

~anaconda3libsite-packagespandasioexcel_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
    334     if not isinstance(io, ExcelFile):
    335         should_close = True
--> 336         io = ExcelFile(io, storage_options=storage_options, engine=engine)
    337     elif engine and engine != io.engine:
    338         raise ValueError(

~anaconda3libsite-packagespandasioexcel_base.py in __init__(self, path_or_buffer, engine, storage_options)
   1069                 ext = "xls"
   1070             else:
-> 1071                 ext = inspect_excel_format(
   1072                     content=path_or_buffer, storage_options=storage_options
   1073                 )

~anaconda3libsite-packagespandasioexcel_base.py in inspect_excel_format(path, content, storage_options)
    963             return "xls"
    964         elif not peek.startswith(ZIP_SIGNATURE):
--> 965             raise ValueError("File is not a recognized excel file")
    966 
    967         # ZipFile typing is overly-strict

ValueError: File is not a recognized excel file```