#python #selenium #dynamic #beautifulsoup #pagination
#python #селен #динамический #beautifulsoup #разбивка на страницы
Вопрос:
Хорошо, итак, вот что я пытаюсь заархивировать:
- Вызовите URL со списком динамически отфильтрованных результатов поиска
- Нажмите на первый результат поиска (5 / страница)
-
Очистите заголовки, абзацы и изображения и сохраните их как объект json в отдельном файле, например
{
«Title»: «Элемент заголовка отдельной записи»,
«Content»: «Графические объекты и изображения в порядке DOM для отдельной записи»
} -
Вернитесь на страницу обзора результатов поиска и повторите шаги 2-3
- После очистки 5/5 результатов перейдите на следующую страницу (щелкните ссылку для разбивки на страницы)
- Повторяйте шаги 2-5, пока не останется ни одной записи
Чтобы еще раз визуализировать, что подразумевается:
Что у меня пока есть, так это:
#import libraries
from selenium import webdriver
from bs4 import BeautfifulSoup
#URL
url = "https://URL.com"
#Create a browser session
driver = webdriver.Chrome("PATH TO chromedriver.exe")
driver.implicitly_wait(30)
driver.get(url)
#click consent btn on destination URL ( overlays rest of the content )
python_consentButton = driver.find_element_by_id('acceptAllCookies')
python_consentButton.click() #click cookie consent btn
#Seleium hands the page source to Beautiful Soup
soup_results_overview = BeautifulSoup(driver.page_source, 'lxml')
for link in soup_results_overview.findAll("a", class_="searchResults__detail"):
#Selenium visits each Search Result Page
searchResult = driver.find_element_by_class_name('searchResults__detail')
searchResult.click() #click Search Result
#Ask Selenium to go back to the search results overview page
driver.back()
#Tell Selenium to click paginate "next" link
#probably needs to be in a sourounding for loop?
paginate = driver.find_element_by_class_name('pagination-link-next')
paginate.click() #click paginate next
driver.quit()
Проблема
Количество списков сбрасывается каждый раз, когда Selenium возвращается на страницу обзора результатов поиска, поэтому он нажимает на первую запись 5 раз, переходит к следующим 5 элементам и останавливается
Вероятно, это предопределенный случай для рекурсивного подхода, не уверен.
Приветствуются любые советы о том, как решить эту проблему.
Комментарии:
1. Можете ли вы поделиться URL-адресом?
2. Да, на самом деле, почему бы и нет: cst.com/solutions#size=5amp;TemplateName=Application Article
3. @derp: Пожалуйста, попробуйте мое решение. надеюсь, это поможет.
Ответ №1:
Вы можете использовать только requests
и BeautifulSoup
для очистки без Selenium. Это будет намного быстрее и будет потреблять гораздо меньше ресурсов:
import json
import requests
from bs4 import BeautifulSoup
# Get 1000 results
params = {"$filter": "TemplateName eq 'Application Article'", "$orderby": "ArticleDate desc", "$top": "1000",
"$inlinecount": "allpages", }
response = requests.get("https://www.cst.com/odata/Articles", params=params).json()
# iterate 1000 results
articles = response["value"]
for article in articles:
article_json = {}
article_content = []
# title of article
article_title = article["Title"]
# article url
article_url = str(article["Url"]).split("|")[1]
print(article_title)
# request article page and parse it
article_page = requests.get(article_url).text
page = BeautifulSoup(article_page, "html.parser")
# get header
header = page.select_one("h1.head--bordered").text
article_json["Title"] = str(header).strip()
# get body content with images links and descriptions
content = page.select("section.content p, section.content img, section.content span.imageDescription, "
"section.content em")
# collect content to json format
for x in content:
if x.name == "img":
article_content.append("https://cst.com/solutions/article/" x.attrs["src"])
else:
article_content.append(x.text)
article_json["Content"] = article_content
# write to json file
with open(f"{article_json['Title']}.json", 'w') as to_json_file:
to_json_file.write(json.dumps(article_json))
print("the end")
Комментарии:
1. Приведенный выше ответ — это четкий, экономный и структурированный подход, который охватывает все, что было запрошено изначально. Запросы OData для меня довольно новы, что весьма полезно, особенно в этом случае.
Ответ №2:
У меня есть одно решение для вас.извлеките href
значение ссылки, а затем выполните driver.get(url)
Вместо этого.
for link in soup_results_overview.findAll("a", class_="searchResults__detail"):
#Selenium visits each Search Result Page
searchResult = driver.find_element_by_class_name('searchResults__detail')
searchResult.click() #click Search Result
#Ask Selenium to go back to the search results overview page
driver.back()
Попробуйте это.
for link in soup_results_overview.findAll("a", class_="searchResults__detail"):
print(link['href'])
driver.get(link['href'])
driver.back()
Здесь я должен распечатать URL-адрес перед навигацией.
https://cst.com/solutions/article/sar-spherical-phantom-model
https://cst.com/solutions/article/pin-fed-four-edges-gap-coupled-microstrip-antenna-magus
https://cst.com/solutions/article/printed-self-matched-normal-mode-helix-antenna-antenna-magus
https://cst.com/solutions/article/broadband-characterization-of-launchers
https://cst.com/solutions/article/modal-analysis-of-a-dielectric-2-port-filter
Ответ №3:
Это решение выполняет навигацию по каждой ссылке, удаляет заголовок и абзацы, сохраняет URL-адреса изображений и загружает все изображения на компьютер как .png
s:
from bs4 import BeautifulSoup as soup
import requests, re
from selenium import webdriver
def scrape_page(_d, _link):
_head, _paras = _d.find('h1', {'class':'head--bordered'}).text, [i.text for i in _d.find_all('p')]
images = [i.img['src'] for i in _d.find_all('a', {'class':'fancybox'})]
for img in images:
_result, _url = requests.get(f'{_link}{img}').content, re.findall("w .ashx$", img)
if _url:
with open('electroresults/{}.png'.format(_url[0][:-5]), 'wb') as f:
f.write(_result)
return _head, _paras, images
d = webdriver.Chrome('/path/to/chromedriver')
d.get('https://www.cst.com/solutions#size=5amp;TemplateName=Application Article')
results, page, _previous = [], 1, None
while True:
_articles = [i.get_attribute('href') for i in d.find_elements_by_class_name('searchResults__detail')]
page_results = []
previous = d.current_url
for article in _articles:
d.get(article)
try:
d.find_elements_by_class_name('interaction')[0].click()
except:
pass
page_results.append(dict(zip(['title', 'paragraphs', 'imgs'], scrape_page(soup(d.page_source, 'html.parser'), d.current_url))))
results.append(page_results)
d.get(previous)
_next = d.find_elements_by_class_name('pagination-link-next')
if _next:
_next[0].click()
else:
break
Вывод (только первые несколько статей на первой странице, из-за ограничения символов SO):
[{'title': 'n Predicting SAR Behavior using Spherical Phantom Modelsn ', 'paragraphs': ['', 'nAntenna Magus is a software tool to help accelerate the antenna design and modelling process. It increases efficiency by helping the engineer to make a more informed choice of antenna element, providing a good starting design.n', '', '', 'n IdEM is a user friendly tool for the generation of macromodels of linear lumped multi-port structures (e.g., via fields, connectors, packages, discontinuities, etc.), known from their input-output port responses. The raw characterization of the structure can come from measurement or simulation, either in frequency domain or in time domain.n ', '', '', 'n FEST3D is a software tool capable of analysing complex passive microwave components based on waveguide technology (including multiplexers, couplers and filters) in very short computational times with high accuracy. This suite offers all the capabilities needed for the design of passive components such as optimization and tolerance analysis. Moreover, FEST3D advanced synthesis tools allow designing bandpass, dual-mode and lowpass filters from user specifications.n ', '', '', 'n SPARK3D is a unique simulation tool for determining the RF breakdown power level in a wide variety of passive devices, including those based on cavities, waveguides, microstrip and antennas. Field results from CST STUDIO SUITE® simulations can be imported directly into SPARK3D to analyse vacuum breakdown (multipactor) and gas discharge. From this, SPARK3D calculates the maximum power that the device can handle without causing discharge effects.n ', '', '', 'nEasy-to-use matching circuit optimization and antenna analysis softwaren Optenni Lab is a professional software tool with innovative analysis features to increase the productivity of engineers requiring matching circuits. It can, e.g., speed up the antenna design process and provide antennas with optimal total performance. Optenni Lab offers fast fully-automatic matching circuit optimization tools, including automatic generation of multiple optimal topologies, estimation of the obtainable bandwidth of antennas and calculation of the worst-case isolation in multi-antenna systems.n ', '', '', 'n The ability to visualize electromagnetic fields intuitively in 3D and also the possibility to demonstrate in a straightforward way the effect of parameter changes are obvious benefits in teaching. To support learning, teaching and research at academic institutions, CST offers four types of licenses, namely the free CST STUDIO SUITE®Student Edition, a Classroom license, an Educational license and an Extended license. n ', '', '', 'n The CST STUDIO SUITE® Student Edition has been developed with the aim of introducing you to the world of electromagnetic simulation, making Maxwell’s equations easier to understand than ever.n ', '', '', 'n Below you will find several examples which were selected from some commonly used textbooks. Each example contains a short description of the theory, detailed information on how to construct the model, a video showing how to construct the model, and the fully constructed model ready for you to download.n ', '', '', 'n In acknowledgement of the importance of university research and the impact of groundbreaking publications on the reputation of both author and tool used for the research, CST announces the endowment of a University Publication Award.n ', '', '', "n Regular training courses are held in CST's offices in Asia, Europe, and North America. Please check the local websites for detail of trainings in China, Korea and Japan. Advance registration is normally required.n ", '', '', 'nCST exhibits at events around the globe. See a list of exhibitions CST is attending where you can speak to our sales and support staff and learn more about our products and their applications.n', '', '', 'nThroughout the year, CST simulation experts present eSeminars on the applications, features and usage of our software. You can also view past eSeminars by searching our archive and filtering for the markets or industries that interest you most.nn', '', '', 'n CST hosts workshops in multiple languages and in countries around the world. Workshops provide an opportunity to learn about specific applications and refresh your skills with experienced CST support staff.n ', '', '', 'n The CST user conference offers an informal and enlightening environment where developers and researchers using CST STUDIO SUITE® tools can exchange ideas and talk with CST staff about future developments.n ', '', 'facebooklinkedinswymtwitteryoutuberss', 'Events', 'Due to the fact that measurements in true biological heads typically cannot be carried out, SAR norms for mobile phones or EMI problems are commonly defined in terms of standardized phantom models. In the easiest case, only spherical structures are considered. To predict the SAR behavior of a new product already during the design stage, it is desirable to include the phantom head in the EM simulations. ', 'The following examplesxa0investigate two spherical phantom models, a basic one that only contains of tissue material inside a glass sphere and a more complex one that has twoxa0additional layers of bone and tissue.xa0xa0A dipole antenna is used for the excitation andxa0is displayed as a yellow line in the following picture.', 'The SAR distribution is simulated at 835 MHz and visualized in the figure below. A comparison of the SAR values over a radial line shows good agreement with the measurement of the same structure.', 'For the following simulation a more complex model including a simplified skull is used.', 'A comparison of the SAR values at 1.95 GHz on an off-axis path showsxa0a significant difference between the basic homogeneous model and the more complex one. Since the values are higher, the simplified model may not be sufficient in all cases.', ' Go to Article', ' Go to Article', ' Go to Article', ' Go to Article', ' Go to Article', 'n Please read ourn Privacy Statementxa0|xa0n Impressum xa0|xa0n Sitemap xa0|xa0n © 2019 Dassault Systemes Deutschland GmbH. All rights reserved.n ', 'Your session has expired. Redirecting you to the login page...', 'n We use cookie to operate this website, improve its usability, personalize your experience, and track visits. By continuing to use this site, you are consenting to use of cookies. You have the possibility to manage the parameters and choose whether to accept certain cookies while on the site. For more information, please read our updated privacy policyn', 'When you browse our website, cookies are enabled by default and data may be read or stored locally on your device. You can set your preferences below:', 'These cookies enable additional functionality like saving preferences, allowing social interactions and analyzing usage for site optimization.', 'These cookies enable us and third parties to serve ads that are relevant to your interests.'], 'imgs': ['~/media/B692C95635564BBDA18AFE7C35D3CC7E.ashx', '~/media/DC7423B9D92542CF8254365D9C83C9E7.ashx', '~/media/54E5C0BE872B411EBDC1698E19894670.ashx', '~/media/114789FC714042A89019C5E41E64ADEE.ashx', '~/media/B9AF3151613C44D2BFE1B5B9B6504885.ashx']}, {'title': 'n Pin-fed Four Edges Gap Coupled Microstrip Antenna | Antenna Magusn ', 'paragraphs': ['', 'nAntenna Magus is a software tool to help accelerate the antenna design and modelling process. It increases efficiency by helping the engineer to make a more informed choice of antenna element, providing a good starting design.n', '', '', 'n IdEM is a user friendly tool for the generation of macromodels of linear lumped multi-port structures (e.g., via fields, connectors, packages, discontinuities, etc.), known from their input-output port responses. The raw characterization of the structure can come from measurement or simulation, either in frequency domain or in time domain.n ', '', '', 'n FEST3D is a software tool capable of analysing complex passive microwave components based on waveguide technology (including multiplexers, couplers and filters) in very short computational times with high accuracy. This suite offers all the capabilities needed for the design of passive components such as optimization and tolerance analysis. Moreover, FEST3D advanced synthesis tools allow designing bandpass, dual-mode and lowpass filters from user specifications.n ', '', '', 'n SPARK3D is a unique simulation tool for determining the RF breakdown power level in a wide variety of passive devices, including those based on cavities, waveguides, microstrip and antennas. Field results from CST STUDIO SUITE® simulations can be imported directly into SPARK3D to analyse vacuum breakdown (multipactor) and gas discharge. From this, SPARK3D calculates the maximum power that the device can handle without causing discharge effects.n ', '', '', 'nEasy-to-use matching circuit optimization and antenna analysis softwaren Optenni Lab is a professional software tool with innovative analysis features to increase the productivity of engineers requiring matching circuits. It can, e.g., speed up the antenna design process and provide antennas with optimal total performance. Optenni Lab offers fast fully-automatic matching circuit optimization tools, including automatic generation of multiple optimal topologies, estimation of the obtainable bandwidth of antennas and calculation of the worst-case isolation in multi-antenna systems.n ', '', '', 'n The ability to visualize electromagnetic fields intuitively in 3D and also the possibility to demonstrate in a straightforward way the effect of parameter changes are obvious benefits in teaching. To support learning, teaching and research at academic institutions, CST offers four types of licenses, namely the free CST STUDIO SUITE®Student Edition, a Classroom license, an Educational license and an Extended license. n ', '', '', 'n The CST STUDIO SUITE® Student Edition has been developed with the aim of introducing you to the world of electromagnetic simulation, making Maxwell’s equations easier to understand than ever.n ', '', '', 'n Below you will find several examples which were selected from some commonly used textbooks. Each example contains a short description of the theory, detailed information on how to construct the model, a video showing how to construct the model, and the fully constructed model ready for you to download.n ', '', '', 'n In acknowledgement of the importance of university research and the impact of groundbreaking publications on the reputation of both author and tool used for the research, CST announces the endowment of a University Publication Award.n ', '', '', "n Regular training courses are held in CST's offices in Asia, Europe, and North America. Please check the local websites for detail of trainings in China, Korea and Japan. Advance registration is normally required.n ", '', '', 'nCST exhibits at events around the globe. See a list of exhibitions CST is attending where you can speak to our sales and support staff and learn more about our products and their applications.n', '', '', 'nThroughout the year, CST simulation experts present eSeminars on the applications, features and usage of our software. You can also view past eSeminars by searching our archive and filtering for the markets or industries that interest you most.nn', '', '', 'n CST hosts workshops in multiple languages and in countries around the world. Workshops provide an opportunity to learn about specific applications and refresh your skills with experienced CST support staff.n ', '', '', 'n The CST user conference offers an informal and enlightening environment where developers and researchers using CST STUDIO SUITE® tools can exchange ideas and talk with CST staff about future developments.n ', '', 'facebooklinkedinswymtwitteryoutuberss', 'Events', 'Although microstrip antennas are very popular in the microwave frequency range because of their simplicity and compatibility with circuit board technology, their limited bandwidth often restricts their usefulness.', 'Various methods have been suggested to overcome this limitation – including the use of gap- or direct-coupled parasitic patches. In the FEGCOMA, these parasitic patches are placed alongside all four edges of the driven patch element. The introduction of parasitic patches of slightly different resonant lengths yields further resonances improving the bandwidth and gain of the standard patch. In this case, the structure is optimized to obtain a well-defined, designable bandwidth with near-optimally spaced zeros. Typical gain values of 10 dBi may be expected, with a designable fractional impedance bandwidth between 12 % and 30 %....', '', ' Go to Article', ' Go to Article', ' Go to Article', ' Go to Article', ' Go to Article', 'n Please read ourn Privacy Statementxa0|xa0n Impressum xa0|xa0n Sitemap xa0|xa0n © 2019 Dassault Systemes Deutschland GmbH. All rights reserved.n ', 'Your session has expired. Redirecting you to the login page...', 'n We use cookie to operate this website, improve its usability, personalize your experience, and track visits. By continuing to use this site, you are consenting to use of cookies. You have the possibility to manage the parameters and choose whether to accept certain cookies while on the site. For more information, please read our updated privacy policyn', 'When you browse our website, cookies are enabled by default and data may be read or stored locally on your device. You can set your preferences below:', 'These cookies enable additional functionality like saving preferences, allowing social interactions and analyzing usage for site optimization.', 'These cookies enable us and third parties to serve ads that are relevant to your interests.'], 'imgs': ['http://www.antennamagus.com/database/antennas/341/Patch_FEGCOMA_Pin_small.png', 'http://www.antennamagus.com/images/Newsletter2019-0/FEGCOMA_3D_with_plus.png', 'http://www.antennamagus.com/images/Newsletter2019-0/FEGCOMA_s11_with_plus.png']}, {'title': 'n Printed Self-Matched Normal Mode Helix Antenna | Antenna Magusn ', 'paragraphs': ['', 'nAntenna Magus is a software tool to help accelerate the antenna design and modelling process. It increases efficiency by helping the engineer to make a more informed choice of antenna element, providing a good starting design.n', '', '', 'n IdEM is a user friendly tool for the generation of macromodels of linear lumped multi-port structures (e.g., via fields, connectors, packages, discontinuities, etc.), known from their input-output port responses. The raw characterization of the structure can come from measurement or simulation, either in frequency domain or in time domain.n ', '', '', 'n FEST3D is a software tool capable of analysing complex passive microwave components based on waveguide technology (including multiplexers, couplers and filters) in very short computational times with high accuracy. This suite offers all the capabilities needed for the design of passive components such as optimization and tolerance analysis. Moreover, FEST3D advanced synthesis tools allow designing bandpass, dual-mode and lowpass filters from user specifications.n ', '', '', 'n SPARK3D is a unique simulation tool for determining the RF breakdown power level in a wide variety of passive devices, including those based on cavities, waveguides, microstrip and antennas. Field results from CST STUDIO SUITE® simulations can be imported directly into SPARK3D to analyse vacuum breakdown (multipactor) and gas discharge. From this, SPARK3D calculates the maximum power that the device can handle without causing discharge effects.n ', '', '', 'nEasy-to-use matching circuit optimization and antenna analysis softwaren Optenni Lab is a professional software tool with innovative analysis features to increase the productivity of engineers requiring matching circuits. It can, e.g., speed up the antenna design process and provide antennas with optimal total performance. Optenni Lab offers fast fully-automatic matching circuit optimization tools, including automatic generation of multiple optimal topologies, estimation of the obtainable bandwidth of antennas and calculation of the worst-case isolation in multi-antenna systems.n ', '', '', 'n The ability to visualize electromagnetic fields intuitively in 3D and also the possibility to demonstrate in a straightforward way the effect of parameter changes are obvious benefits in teaching. To support learning, teaching and research at academic institutions, CST offers four types of licenses, namely the free CST STUDIO SUITE®Student Edition, a Classroom license, an Educational license and an Extended license. n ', '', '', 'n The CST STUDIO SUITE® Student Edition has been developed with the aim of introducing you to the world of electromagnetic simulation, making Maxwell’s equations easier to understand than ever.n ', '', '', 'n Below you will find several examples which were selected from some commonly used textbooks. Each example contains a short description of the theory, detailed information on how to construct the model, a video showing how to construct the model, and the fully constructed model ready for you to download.n ', '', '', 'n In acknowledgement of the importance of university research and the impact of groundbreaking publications on the reputation of both author and tool used for the research, CST announces the endowment of a University Publication Award.n ', '', '', "n Regular training courses are held in CST's offices in Asia, Europe, and North America. Please check the local websites for detail of trainings in China, Korea and Japan. Advance registration is normally required.n ", '', '', 'nCST exhibits at events around the globe. See a list of exhibitions CST is attending where you can speak to our sales and support staff and learn more about our products and their applications.n', '', '', 'nThroughout the year, CST simulation experts present eSeminars on the applications, features and usage of our software. You can also view past eSeminars by searching our archive and filtering for the markets or industries that interest you most.nn', '', '', 'n CST hosts workshops in multiple languages and in countries around the world. Workshops provide an opportunity to learn about specific applications and refresh your skills with experienced CST support staff.n ', '', '', 'n The CST user conference offers an informal and enlightening environment where developers and researchers using CST STUDIO SUITE® tools can exchange ideas and talk with CST staff about future developments.n ', '', 'facebooklinkedinswymtwitteryoutuberss', 'Events', 'Normal mode helix antennas (NMHA) are often used for handheld radio transceivers and mobile communications applications. The printed self-matched NMHA is naturally matched to 50 Ω, thus avoiding the typical design challenge of matching similar structures at resonance.', 'It exhibits properties similar to other NMHAs, namely: It is compact (with the total height being typically 0.14 λ), it is vertically polarized and omni-directional and has a bandwidth of approximately 3%.', 'The helical structure consists of two (inner and outer) metallic helical strips of equal width, with a central dielectric section between them.', ' Go to Article', ' Go to Article', ' Go to Article', ' Go to Article', ' Go to Article', 'n Please read ourn Privacy Statementxa0|xa0n Impressum xa0|xa0n Sitemap xa0|xa0n © 2019 Dassault Systemes Deutschland GmbH. All rights reserved.n ', 'Your session has expired. Redirecting you to the login page...', 'n We use cookie to operate this website, improve its usability, personalize your experience, and track visits. By continuing to use this site, you are consenting to use of cookies. You have the possibility to manage the parameters and choose whether to accept certain cookies while on the site. For more information, please read our updated privacy policyn', 'When you browse our website, cookies are enabled by default and data may be read or stored locally on your device. You can set your preferences below:', 'These cookies enable additional functionality like saving preferences, allowing social interactions and analyzing usage for site optimization.', 'These cookies enable us and third parties to serve ads that are relevant to your interests.'], 'imgs': ['http://www.antennamagus.com/database/antennas/342/Printed_Matched_NMHA_small.png', 'http://www.antennamagus.com/images/Newsletter2019-0/NMHA_3D_Farfield_with_plus.png', 'http://www.antennamagus.com/images/Newsletter2019-0/NMHA_2D_sketch_with_plus.png', 'http://www.antennamagus.com/images/Newsletter2019-0/NMHA_S11vsFrequency_with_plus.png']}]
Ответ №4:
Далее задается количество результатов равным 20 и вычисляется количество страниц результатов. Далее нажимается до тех пор, пока не будут посещены все страницы. Добавлено условие, гарантирующее загрузку страницы. Я печатаю статьи только для того, чтобы показать вам разные страницы. Вы можете использовать эту структуру для создания желаемого результата.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import math
startUrl = 'https://www.cst.com/solutions#size=20amp;TemplateName=Application Article'
url = 'https://www.cst.com/solutions#size=20amp;TemplateName=Application Articleamp;page={}'
driver = webdriver.Chrome()
driver.get(startUrl)
driver.find_element_by_id('acceptAllCookies').click()
items = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".searchResults__detail")))
resultCount = int(driver.find_element_by_css_selector('[data-bind="text: resultsCount()"]').text.replace('items were found','').strip())
resultsPerPage = 20
numPages = math.ceil(resultCount/resultsPerPage)
currentCount = resultsPerPage
header = driver.find_element_by_css_selector('.searchResults__detail h3').text
test = header
for page in range(1, numPages 1):
if page == 1:
print([item.text for item in items])
#do something with first page
else:
driver.find_element_by_css_selector('.pagination-link-next').click()
while header == test:
try:
header = driver.find_element_by_css_selector('.searchResults__detail h3').text
except:
continue
items = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".searchResults__detail")))
test = header
#do something with next page
print([item.text for item in items])
if page == 4: #delete later
break #delete later
Ответ №5:
Вы не используете свою переменную link нигде в своем цикле, просто указывая драйверу найти верхнюю ссылку и щелкнуть по ней. (Когда вы используете единственный селектор find_element и есть несколько результатов, selenium просто выбирает первый). Я думаю, все, что вам нужно сделать, это заменить эти строки
searchResult = driver.find_element_by_class_name('searchResults__detail')
searchResult.click()
С помощью
link.click()
Помогает ли это?
ХОРОШО .. что касается разбивки на страницы, вы могли бы использовать следующую стратегию, поскольку кнопка «Далее» исчезает:
paginate = driver.find_element_by_class_name('pagination-link-next')
while paginate.is_displayed() == true:
for link in soup_results_overview.findAll("a", class_="searchResults__detail"):
#Selenium visits each Search Result Page
searchResult.click() #click Search Result
#Scrape the form with a function defined elsewhere
scrape()
#Ask Selenium to go back to the search results overview page
driver.back()
#Click pagination button after executing the for loop finishes on each page
paginate.click()
Комментарии:
1. Это не работает, оно возвращает: TypeError: объект ‘NoneType’ не вызывается
2. Ну, это меня смущает. На самом деле я сейчас нахожусь на мобильном устройстве, но вскоре я проверю это в реальном браузере. Тем временем у меня глупый вопрос: вы подтвердили, что ссылки найдены вашим soup_results_overview?
3. Теперь, когда я думаю об этом, зачем вам для этого нужен beautiful soup? Кажется, что вы могли бы просто найти ссылки с помощью selenium и не беспокоиться о полном источнике страницы. По общему признанию, я не знаком с beautifulsoup, но, похоже, selenium мог бы выполнить эту работу.