#python #selenium #google-chrome #selenium-webdriver #selenium-chromedriver
#питон #селен #google-chrome #селен-веб-драйвер #селен-хромированный привод
Вопрос:
Я пытаюсь очистить данные с веб-сайтов, используя selenium. Приложение работает на flask uwsgi nginx на экземпляре aws ec2.
Код
from selenium import webdriver from selenium.common.exceptions import WebDriverException from selenium.webdriver.chrome.service import Service service = Service("/opt/chromedriver") headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.3" } options = webdriver.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.3") options.add_argument("--headless") options.add_argument("--ignore-certificate-errors") options.add_argument("--enable-javascript") options.add_argument("--incognito") options.add_argument("--disable-dev-shm-usage") def scrape_data(URL): try: driver = webdriver.Chrome(service=service, options=options) driver.get(URL) driver.implicitly_wait(2) html_content = driver.page_source driver.quit() except WebDriverException: driver.quit() print("Failed URL --gt;", URL) return html_content url_x = input("Enter url : ") raw_text = scrape_data(url_x)
Я получаю ошибку при выполнении этого при доступе через nginx uwsgi, но без ошибок, если я выполняю код в CLI :
Traceback (most recent call last): File "/home/ubuntu/ml_eval/./main.py", line 98, in scrape_data uwsgi[45145]: File "/home/ubuntu/ml_eval/prjenv/lib/python3.8/site-packages/selenium/webdriver/chrome/webdriver.py", line 70, in __init__ uwsgi[45145]: driver = webdriver.Chrome(service=service, options=options) uwsgi[45145]: super(WebDriver, self).__init__(DesiredCapabilities.CHROME['browserName'], "goog", uwsgi[45145]: File "/home/ubuntu/ml_eval/prjenv/lib/python3.8/site-packages/selenium/webdriver/chromium/webdriver.py", line 93, in __init__ uwsgi[45145]: RemoteWebDriver.__init__( uwsgi[45145]: File "/home/ubuntu/ml_eval/prjenv/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 268, in __init__ uwsgi[45145]: self.start_session(capabilities, browser_profile) uwsgi[45145]: File "/home/ubuntu/ml_eval/prjenv/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 359, in start_session uwsgi[45145]: response = self.execute(Command.NEW_SESSION, parameters) uwsgi[45145]: File "/home/ubuntu/ml_eval/prjenv/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 424, in execute uwsgi[45145]: self.error_handler.check_response(response) uwsgi[45145]: File "/home/ubuntu/ml_eval/prjenv/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py", line 247, in check_response uwsgi[45145]: raise exception_class(message, screen, stacktrace) uwsgi[45145]: selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally. uwsgi[45145]: (unknown error: DevToolsActivePort file doesn't exist) uwsgi[45145]: (The process started from chrome location /usr/bin/google-chrome is no longer running, so ChromeDriver is assuming that Chrome has crashed.) uwsgi[45145]: Stacktrace: uwsgi[45145]: #0 0x55765c077ee3 lt;unknowngt;
Как это можно решить ?
**Операционная система : ubuntu 20.04
Python 3.8.10
ChromeDriver 96.0.4664.45 (76e4c1bb2ab4671 b8beba3444e61c0f17584b2fc-ссылки/главы филиалов/4664@{#947})
Google Chrome 96.0.4664.93
Расположение Google chrome : /usr/bin/google-chrome**
Ответ №1:
Добавьте :/usr/bin:/bin
в свой ПУТЬ, как показано ниже:
Environment="PATH=/home/artem/env/bin:/usr/bin:/bin"
полный файл выглядит следующим образом
$ sudo cat /etc/systemd/system/ml_eval.service [Unit] Description=uWSGI instance to serve flask app After=network.target [Service] User=ubuntu Group=www-data WorkingDirectory=/home/ubuntu/mleval Environment="PATH=/home/ubuntu/mleval/prjenv/bin:/usr/bin:/bin" ExecStart=/home/ubuntu/mleval/prjenv/bin/uwsgi --ini mleval.ini [Install] WantedBy=multi-user.target