Код Selenium остановился после определенной итерации с использованием веб-очистки в Python

#python #pandas #selenium #selenium-webdriver

Вопрос:

в своем проекте я пытаюсь сделать несколько веб-скребков, чтобы получить все данные о футболистах, принадлежащих к определенной национальности . Но проблема в том , что, хотя я получаю все ссылки для стран и игроков, мой код останавливается после достижения 10-й страны, и он возвращает мне эту ошибку, хотя я уже получил ссылку. Вот мой код :

 from time import sleep
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from selenium.webdriver.chrome.options import Options
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait

options = Options()

# Creating our Players Dictionary
sofa_score_players = pd.DataFrame(columns=['name', 'nationality', 'age', 'height', 'preferred_foot',
                                           'team', 'position', 'number', 'highest_transfert_fee'])
# We specify our Chrome driver path
path = "C:/Users/Al4D1N/Documents/ChromeDriver_webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=path)

url = "https://www.sofascore.com/football/rankings/fifa"
driver.get(url)

footballCountries = driver.find_elements_by_css_selector("div.styles__RankingsItemContainer-coiowh-0.hvNSrf")
ignored_exceptions = (NoSuchElementException, StaleElementReferenceException,)
# waiting_issue = WebDriverWait(driver, 3, ignored_exceptions=ignored_exceptions).until(expected_conditions.presence_of_element_located((By.CSS_SELECTOR, footballCountries)))
for footballCountry in footballCountries:
    sleep(2)
    #waiting_issue = WebDriverWait(driver, 3, ignored_exceptions=ignored_exceptions).until(
    #    expected_conditions.presence_of_element_located((By.CSS_SELECTOR, footballCountries)))
    # in each element , we select the a tag
    atags = footballCountry.find_elements_by_css_selector('a')
    for atag in atags:
        # In each atag, select the href
        href = atag.get_attribute('href')
        print(href)
        # Open a new window
        driver.execute_script("window.open('');")
        driver.switch_to.window(driver.window_handles[1])
        driver.get(href)
        # sleep(2)
        # get <a> tags for countries players
        boxplayers = driver.find_elements_by_css_selector('a.componentStyles__CardWrapper-sc-8o3g86-0.cAhval')
        for player in boxplayers:
            playerlink = player.get_attribute('href')
            # Open a new window
            print(playerlink)
            driver.execute_script("window.open('');")
            driver.switch_to.window(driver.window_handles[2])
            driver.get(playerlink)
            # sleep(2)
            # Get player factors
            player_details = driver.find_elements_by_css_selector('h2.styles__DetailBoxTitle-sc-1ss54tr-11.gMYPyy')
            player_source_details = driver.find_elements_by_css_selector('div.Content-sc-1o55eay-0.gYsVZh')
            player_nationality = player_details[0].text
            player_age = player_details[1].text
            player_height = player_details[2].text
            player_preferred_foot = player_details[3].text
            player_position = player_details[4].text
            player_number = player_details[5].text
            player_name = player_source_details[0].text
            player_team = player_source_details[2].text
            player_highest_transfert_fee = driver.find_element_by_css_selector('div.Content-sc-1o55eay-0.gYsVZh').text

            # Now we store them in our dataframe
            sofa_score_players = sofa_score_players.append({'name': player_name, 'nationality': player_nationality,
                                                            'age': player_age, 'height': player_height,
                                                            'preferred_foot': player_preferred_foot,
                                                            'team': player_team,
                                                            'position': player_position, 'number': player_number,
                                                            'highest_transfert_fee': player_highest_transfert_fee},
                                                           ignore_index=True)
            # Close the tab with URL B
            driver.close()
            # Switch back to the first tab with URL A
            driver.switch_to.window(driver.window_handles[1])

        # Close the tab with URL B
        driver.close()
        # Switch back to the first tab with URL A
        driver.switch_to.window(driver.window_handles[0])

driver.close()

# We store our dataframe in an excel file to be easily readable
sofa_score_players.to_excel('Sofa_Score_Players.xlsx', index=False)

Поскольку этот код занимает много времени , чтобы закончить, есть ли какой-либо способ, которым я могу быть уверен, что он пройдет по всем ссылкам, которые я получил в начале?

Вопрос:

Вам также может понравиться

Ошибка импорта Mongodb

Преобразование буквы в числовой ранг алфавита

Невозможно импортировать Chatterbot даже после установки пакета chatterbot через pip, а также на Pycharm через интерпретатор Python