#python #pandas #sharepoint #jupyter-notebook
Вопрос:
Всем привет, я пытаюсь подключить jupyter к своему бизнесу sharepoint/onedrive, чтобы экспортировать csv (файл большого размера) и проанализировать данные в нем . Я перепробовал все методы в Интернете и stackoverflow, но в лучшем случае у меня 404 ошибки запрещены. Вот один из методов с ошибкой. кто — нибудь может мне помочь ? большое спасибо.
!{sys.executable} -m pip install Office365-REST-Python-Client
#import all the libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import io
import pandas as pd
#target url taken from sharepoint and credentials
url = 'shared url of the csv file'
username = 'name-id@group-name.onmicrosoft.com'
password = '01password'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Authentication successful")
response = File.open_binary(ctx, url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read excel file and each sheet into pandas dataframe
df = pd.read_excel(bytes_file_obj)```
then i have this error :
``` ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-8ce9c72ed35a> in <module>
30
31 #read excel file and each sheet into pandas dataframe
---> 32 df = pd.read_excel(bytes_file_obj)
~anaconda3libsite-packagespandasutil_decorators.py in wrapper(*args, **kwargs)
297 )
298 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 299 return func(*args, **kwargs)
300
301 return wrapper
~anaconda3libsite-packagespandasioexcel_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
334 if not isinstance(io, ExcelFile):
335 should_close = True
--> 336 io = ExcelFile(io, storage_options=storage_options, engine=engine)
337 elif engine and engine != io.engine:
338 raise ValueError(
~anaconda3libsite-packagespandasioexcel_base.py in __init__(self, path_or_buffer, engine, storage_options)
1069 ext = "xls"
1070 else:
-> 1071 ext = inspect_excel_format(
1072 content=path_or_buffer, storage_options=storage_options
1073 )
~anaconda3libsite-packagespandasioexcel_base.py in inspect_excel_format(path, content, storage_options)
963 return "xls"
964 elif not peek.startswith(ZIP_SIGNATURE):
--> 965 raise ValueError("File is not a recognized excel file")
966
967 # ZipFile typing is overly-strict
ValueError: File is not a recognized excel file```