#python
Вопрос:
from bs4 import BeautifulSoup as sp import requests as rqs from selenium import webdriver import pandas as pd
HEADERS = {'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
'Accept-Language': 'en-US, en;q=0.5'} options= webdriver.ChromeOptions() options.headless = True
title = [] price = [] ratings = [] desc = [] def extractor(url):
base_url = url
driver = webdriver.Chrome(executable_path='chromedriver.exe', options=options)
driver.get(base_url)
web_page = sp(driver.page_source, "lxml")
links = web_page.find_all('li', attrs={'class': 'c2prKC'})
for link in links:
item = link.find('div', attrs={'class': "cRjKsc"}).a['href']
print(f"[INFO]:Requesting at {item}")
try:
req = rqs.get(item, headers=HEADERS)
item_page = sp(req.content, 'html.parser')
# Title of the product
item_title = item_page.find('h2', attrs={'class': 'pdp-mod-product-badge-title'}).text
title.append(item_title)
# Price of the product
item_price = item_page.find('span', attrs={'id': 'pdp-mod-product-price'}).text
price.append(item_price)
# Ratings of the product
item_ratings = item_page.find('span', attrs={'class': 'pdp-block pdp-block__rating-questions'}).text
ratings.append(item_ratings)
# Description of the product
item_description = item_page.find('div', attrs={'class': 'pdp-block module'}).text.replace("n", "")
desc.append(item_description)
except:
pass def scraper(url,pages):
for i in range(1,pages 1):
page_url = f"{url}amp;page={i}"
extractor(page_url)
#Scrapper Function....... scraper("https://www.daraz.pk/catalog/?q=dell laptopsamp;_keyori=ssamp;from=inputamp;spm=a2a0e.searchlist.search.go.371024723qw3Be",2) df = pd.DataFrame({"Title":title, "Price":price, "Ratings":ratings,"Description":desc}) file = input("Enter your file name: ") df.to_csv(f"{file}.csv", index=False) print("Congrats!!! you have scraped desired data.")
»’