Загрузите несколько файлов из массива и поместите в нужный каталог с помощью Python3

#python #python-3.x #web-scraping #beautifulsoup #python-requests

Вопрос:

 #Import desired libarary's -- Make HTTP Requests / Query DOM ELEMENTS  import requests from bs4 import BeautifulSoup as bs import zipfile  # Make requests to NGA site to, response stored in r (DOM) r = requests.get('https://earth-info.nga.mil/index.php?dir=coordsysamp;action=gars-20x20-dloads')  # Parse data using Beautiful soup libarary, and the default HTML parser soup = bs(r.content, 'html.parser') # Output is pure RAW HTML DOM # print(soup)  # Scan Dom tree and places desired href zip files into an array for future downloading -- Files array files = ['https://earth-info.nga.mil/'   i['href'] for i in soup.select('area')] # print(files)    #Download Single file from Array # firstUrl = files[0]  # Download multiple files from Array for file in files:  r = requests.get(file, stream=True)  save_path = '/Users/iga0779/Downloads/%s.zip'%r  filex = open(save_path, 'wb')  filex.write(downloadedfile.content)  filex.close()  

В настоящее время я немного зациклился на следующих шагах здесь, я выбрал каталог загрузки как место, куда я хотел бы поместить файлы, но я немного новичок и не уверен, как правильно писать в каталог.

Ответ №1:

Вы можете пойти и также частично загрузить свои файлы with open() :

 for file in files:  with requests.get(file, stream=True) as r:  r.raise_for_status()   with open(f'tmpZip/{file.split("/")[-1]}.zip', 'wb') as f:  for chunk in r.iter_content(chunk_size=8192):   f.write(chunk)  

Пример

 import requests from bs4 import BeautifulSoup as bs import zipfile  # Make requests to NGA site to, response stored in r (DOM) r = requests.get('https://earth-info.nga.mil/index.php?dir=coordsysamp;action=gars-20x20-dloads')  # Parse data using Beautiful soup libarary, and the default HTML parser soup = bs(r.content, 'html.parser') # Output is pure RAW HTML DOM # print(soup)  # Scan Dom tree and places desired href zip files into an array for future downloading -- Files array files = ['https://earth-info.nga.mil/'   i['href'] for i in soup.select('area')] # print(files)  def download_file(file):  with requests.get(file, stream=True) as r:  r.raise_for_status()  with open(f'tmpZip/{file.split("/")[-1]}.zip', 'wb') as f:  for chunk in r.iter_content(chunk_size=8192):   f.write(chunk)  return f'File: {file.split("/")[-1]}.zip -gt; downloaded'  #files sliced to first three urls from result, delet [:3] to get all for file in files[:3]:  print(download_file(file))  

Выход

 File: 180W60N.zip -gt; downloaded File: 180W40N.zip -gt; downloaded File: 180W20N.zip -gt; downloaded  

Ответ №2:

Вы также можете попробовать это

 #Import desired libarary's -- Make HTTP Requests / Query DOM ELEMENTS  import requests from bs4 import BeautifulSoup as bs import zipfile import os  from zipfile import ZipFile from io import BytesIO   headers = {  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",  "Accept-Encoding": "*",  "Connection": "keep-alive" } # Make requests to NGA site to, response stored in r (DOM) r = requests.get('https://earth-info.nga.mil/index.php?dir=coordsysamp;action=gars-20x20-dloads')  # Parse data using Beautiful soup libarary, and the default HTML parser soup = bs(r.content, 'html.parser') # Output is pure RAW HTML DOM # print(soup)  # Scan Dom tree and places desired href zip files into an array for future downloading -- Files array files = ['https://earth-info.nga.mil/'   i['href'] for i in soup.select('area')] # print(files)  mydirname = r'C:\Users\User\Documents\Downloads'  for url in files:  r = requests.get(url, headers=headers,stream=True)  if r.status_code == 200:  newfoldername = r.url.split('/')[-1]  if not os.path.exists(newfoldername):  os.mkdir(newfoldername)  path_ = os.path.join(mydirname, newfoldername )  zipfile.ZipFile(BytesIO(r.content)).extractall(path_)    print('Finished...')