#python #amazon-s3 #boto3 #pyarrow #awswrangler
Вопрос:
Я пытаюсь получить доступ к таблице в корзине AWS. Когда я пытаюсь получить доступ к нему с помощью awswrangler.read_parquet
функции, я получаю сообщение об ошибке, в котором говорится, что я не могу получить доступ к этому файлу, потому что я не могу создавать новые потоки. Обычно я могу получить доступ к этому файлу после ожидания более 30 минут, но это не говорит мне, как решить проблему. Вот более подробная информация о команде:
aws_df = wr.s3.read_parquet(path=self._filepath, **self._load_args)
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_read_parquet.py", line 721, in read_parquet
read_func=_read_parquet, paths=paths, version_ids=versions, use_threads=use_threads, kwargs=args
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_read.py", line 145, in _read_dfs_from_multiple_paths
return list(df for df in executor.map(partial_read_func, paths, versions))
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_read.py", line 145, in <genexpr>
return list(df for df in executor.map(partial_read_func, paths, versions))
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/_base.py", line 586, in result_iterator
yield fs.pop().result()
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/_base.py", line 432, in result
return self.__get_result()
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_read_parquet.py", line 495, in _read_parquet
version_id=version_id,
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_read_parquet.py", line 440, in _read_parquet_file
source=f, read_dictionary=categories
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_read_parquet.py", line 40, in _pyarrow_parquet_file_wrapper
return pyarrow.parquet.ParquetFile(source=source, read_dictionary=read_dictionary)
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/pyarrow/parquet.py", line 201, in __init__
read_dictionary=read_dictionary, metadata=metadata)
File "pyarrow/_parquet.pyx", line 1021, in pyarrow._parquet.ParquetReader.open
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_fs.py", line 569, in read
self._fetch(self._loc, self._loc length)
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_fs.py", line 376, in _fetch
self._cache = self._fetch_range_proxy(self._start, self._end)
File "/home/ec2-user/anaconda3/lib/python3.7/site-packages/awswrangler/s3/_fs.py", line 359, in _fetch_range_proxy
itertools.repeat(self._version_id),
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/_base.py", line 575, in map
fs = [self.submit(fn, *args) for args in zip(*iterables)]
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/_base.py", line 575, in <listcomp>
fs = [self.submit(fn, *args) for args in zip(*iterables)]
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 160, in submit
self._adjust_thread_count()
File "/home/ec2-user/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 181, in _adjust_thread_count
t.start()
File "/home/ec2-user/anaconda3/lib/python3.7/threading.py", line 847, in start
_start_new_thread(self._bootstrap, ())
RuntimeError: can't start new thread
Ответ №1:
Я смог решить эту проблему, добавив режим сна между моими командами.
import time
time.sleep(10)