#python #web-scraping #beautifulsoup #python-requests #scrapinghub
#питон #соскабливание полотна #прекрасный суп #python-запросы #scrapinghub
Вопрос:
Итак, в основном я использую приведенный ниже код, чтобы очистить URL-адреса изображений кредитных карт от соответствующих ссылок в переменной explore_more_url.
from urllib.request import urlopen
from bs4 import BeautifulSoup
import json, requests, re
from selenium import webdriver
driver = webdriver.Chrome(executable_path="C:\Users\Hari\Downloads\chromedriver.exe")
img_url = []
explore_more_url = ['https://www.axisbank.com/retail/cards/credit-card/axis-bank-ace-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-aura-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/privilege-easy-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/reserve-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-freecharge-plus-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/indianoil-axis-bank-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-magnus-card/feature-benefits', 'https://www.axisbank.com/retail/cards/credit-card/flipkart-axisbank-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-freecharge-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/my-zone-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/neo-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-vistara-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-vistara-signature-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-vistara-infinite-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/privilege-credit-card-with-unlimited-travel-benefits-account', 'https://www.axisbank.com/retail/cards/credit-card/miles-more-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/axis-bank-select-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/pride-platinum-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/pride-signature-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/my-zone-easy-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/insta-easy-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/signature-credit-card-with-lifestyle-benefits', 'https://www.axisbank.com/retail/cards/credit-card/platinum-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/titanium-smart-traveler-credit-card', 'https://www.axisbank.com/retail/cards/credit-card/my-wings-credit-card/features-benefits']
for x in explore_more_url:
driver.get(x)
soup_1 = BeautifulSoup(driver.page_source, 'lxml')
img_url.append("https://www.axisbank.com" soup_1.find('img', alt="Fast Forward Banner").get('src'))
print(img_url)
Вывод :
Traceback (most recent call last):
File "C:UsersHariPycharmProjectsCard_Prjaxis.py", line 82, in <module>
img_url.append("https://www.axisbank.com" soup_1.find('img', alt="Fast Forward Banner").get('src'))
AttributeError: 'NoneType' object has no attribute 'get'
Изображения выглядят примерно так в каждой ссылке:
Какой подходящий код я мог бы использовать, чтобы получить именно то, что я ожидаю?
Ответ №1:
Одним из способов получения изображения может быть следующий:
import requests
from bs4 import BeautifulSoup
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
page = requests.get("https://www.axisbank.com/retail/cards/credit-card/axis-bank-ace-credit-card", headers=headers).text
img_src_ = BeautifulSoup(page, "html.parser").select_one('.bannerWrapper img')["src"]
with open(img_src_.rsplit("/")[-1], "wb") as image:
image.write(requests.get(f"https://www.axisbank.com{img_src_}").content)
Вывод: .jpg
файл в локальном каталоге скрипта.
ace-product-landing-web-version-1920x360.jpg
РЕДАКТИРОВАТЬ: Чтобы получить только исходные URL-адреса, попробуйте следующее:
import requests
from bs4 import BeautifulSoup
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
explore_more_url = [
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-ace-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-aura-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/privilege-easy-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/reserve-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-freecharge-plus-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/indianoil-axis-bank-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-magnus-card/feature-benefits',
'https://www.axisbank.com/retail/cards/credit-card/flipkart-axisbank-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-freecharge-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/my-zone-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/neo-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-vistara-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-vistara-signature-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-vistara-infinite-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/privilege-credit-card-with-unlimited-travel-benefits-account',
'https://www.axisbank.com/retail/cards/credit-card/miles-more-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/axis-bank-select-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/pride-platinum-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/pride-signature-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/my-zone-easy-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/insta-easy-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/signature-credit-card-with-lifestyle-benefits',
'https://www.axisbank.com/retail/cards/credit-card/platinum-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/titanium-smart-traveler-credit-card',
'https://www.axisbank.com/retail/cards/credit-card/my-wings-credit-card/features-benefits',
]
img_urls = []
for url in explore_more_url:
page = requests.get(url, headers=headers).text
try:
img_src_ = BeautifulSoup(page, "html.parser").select_one('.bannerWrapper img')["src"]
print(f"Finding image source url for {url}")
img_urls.append(f"https://www.axisbank.com{img_src_}")
except (KeyError, TypeError):
continue
print(img_urls)
Вывод:
['https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/ace-product-landing-web-version-1920x360.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/health-and-wellness-product-page-1920x360_v1.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/freecharge-product-landing-page-desktop-banner-revised.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/indian-oil-banner-desktop.jpg', 'https://www.axisbank.com/img/magnuscard/apply-now.png', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/flipkart-abcc-desk.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/product-landing-page-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/myzone-easy-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/neo-credit-card-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/vistara-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/vistara-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/vistara-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/privilege-credit-card.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/miles---more-credit-card.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/desktop-select-credit-card.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/pride-platinum-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/pride-platinum-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/myzone-easy-1920-360-desktop-banner.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/insta-easy-credit-card.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/signature-credit-card-with.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/platinum-credit-card.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/signature-credit-card-with.jpg', 'https://www.axisbank.com/images/default-source/revamp_new/cards/credit-cards/desktop/my-wings-credit-card.jpg']
Комментарии:
1. @ baduker Я высоко ценю код, который вы написали, спасибо за это, но на данный момент мне просто нужен список всех URL-адресов изображений карточек.
2. @ baduker слов недостаточно, чтобы описать мою благодарность вам за вашу правку, но есть небольшая проблема: я просмотрел ссылки в выходных данных, некоторые ссылки дают мне изображение карты, а некоторые ссылки дают не изображение карты, а какое-то неуместное изображение. Я был бы рад, если бы вы смогли решить эту проблему.
3. @ baduker Да, но есть некоторые ссылки, в которых нет баннеров, но есть изображения карточек где-то внизу страницы.