#python-3.x #parsing #web-scraping #app-store
#python-3.x #синтаксический анализ #очистка веб-страниц #app-store
Вопрос:
Вот что я пробовал, и это не сработало:
Сначала pip3 install app-store-scraper
, а затем:
from app_store_scraper import AppStore
from pprint import pprint
appstore_app = AppStore(country="us", app_name="yazio-fasting-food-tracker", app_id=946099227)
appstore_app.review()
pprint(appstore_app.reviews)
pprint(appstore_app.reviews_count)
Ошибка:
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/urllib3/connection.py in _new_conn(self)
159 conn = connection.create_connection(
--> 160 (self._dns_host, self.port), self.timeout, **extra_kw
161 )
/usr/local/lib/python3.7/dist-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
60
---> 61 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
62 af, socktype, proto, canonname, sa = res
/usr/lib/python3.7/socket.py in getaddrinfo(host, port, family, type, proto, flags)
747 addrlist = []
--> 748 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
749 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
676 headers=headers,
--> 677 chunked=chunked,
678 )
/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
380 try:
--> 381 self._validate_conn(conn)
382 except (SocketTimeout, BaseSSLError) as e:
/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
975 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 976 conn.connect()
977
/usr/local/lib/python3.7/dist-packages/urllib3/connection.py in connect(self)
307 # Add certificate verification
--> 308 conn = self._new_conn()
309 hostname = self.host
/usr/local/lib/python3.7/dist-packages/urllib3/connection.py in _new_conn(self)
171 raise NewConnectionError(
--> 172 self, "Failed to establish a new connection: %s" % e
173 )
NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f1a3eb18390>: Failed to establish a new connection: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
724 retries = retries.increment(
--> 725 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
726 )
/usr/local/lib/python3.7/dist-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
438 if new_retry.is_exhausted():
--> 439 raise MaxRetryError(_pool, url, error or ResponseError(cause))
440
MaxRetryError: HTTPSConnectionPool(host='apps.apple.com', port=443): Max retries exceeded with url: /us/app/yazio-fasting-food-tracker/id946099227 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1a3eb18390>: Failed to establish a new connection: [Errno -2] Name or service not known'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-4-c7ec3a01ece6> in <module>
2 from pprint import pprint
3
----> 4 appstore_app = AppStore(country="us", app_name="yazio-fasting-food-tracker", app_id=946099227)
5 appstore_app.review(how_many=1000)
6
~/.local/lib/python3.7/site-packages/app_store_scraper/app_store.py in __init__(self, country, app_name, app_id, log_format, log_level, log_interval)
27 log_format=log_format,
28 log_level=log_level,
---> 29 log_interval=log_interval,
30 )
31
~/.local/lib/python3.7/site-packages/app_store_scraper/base.py in __init__(self, country, app_name, app_id, log_format, log_level, log_interval)
62 self._request_headers = {
63 "Accept": "application/json",
---> 64 "Authorization": self._token(),
65 "Connection": "keep-alive",
66 "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
~/.local/lib/python3.7/site-packages/app_store_scraper/base.py in _token(self)
126
127 def _token(self):
--> 128 self._get(self.url)
129 tags = self._response.text.splitlines()
130 for tag in tags:
~/.local/lib/python3.7/site-packages/app_store_scraper/base.py in _get(self, url, headers, params, total, backoff_factor, status_forcelist)
123 s.mount(self._base_request_url, HTTPAdapter(max_retries=retries))
124 logger.debug(f"Making a GET request: {url}")
--> 125 self._response = s.get(url, headers=headers, params=params)
126
127 def _token(self):
/usr/local/lib/python3.7/dist-packages/requests/sessions.py in get(self, url, **kwargs)
541
542 kwargs.setdefault('allow_redirects', True)
--> 543 return self.request('GET', url, **kwargs)
544
545 def options(self, url, **kwargs):
/usr/local/lib/python3.7/dist-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
528 }
529 send_kwargs.update(settings)
--> 530 resp = self.send(prep, **send_kwargs)
531
532 return resp
/usr/local/lib/python3.7/dist-packages/requests/sessions.py in send(self, request, **kwargs)
641
642 # Send the request
--> 643 r = adapter.send(request, **kwargs)
644
645 # Total elapsed time of the request (approximately)
/usr/local/lib/python3.7/dist-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
514 raise SSLError(e, request=request)
515
--> 516 raise ConnectionError(e, request=request)
517
518 except ClosedPoolError as e:
ConnectionError: HTTPSConnectionPool(host='apps.apple.com', port=443): Max retries exceeded with url: /us/app/yazio-fasting-food-tracker/id946099227 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f1a3eb18390>: Failed to establish a new connection: [Errno -2] Name or service not known'))
Пожалуйста, посоветуйте, как справиться с этой ошибкой, или, может быть, есть другой способ анализа обзоров AppStore.
Комментарии:
1. Дважды проверьте подключение к Интернету. В вашем коде нет ничего плохого. Для меня это работает как есть.
2. @baduker получает это:
2020-10-02 18:01:47,362 [INFO] Base - Initialised: AppStore('us', 'yazio-fasting-food-tracker', 946099227) 2020-10-02 18:01:47,365 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/yazio-fasting-food-tracker/id946099227 2020-10-02 18:01:47,684 [ERROR] Base - Something went wrong: 'data' 2020-10-02 18:01:47,686 [INFO] Base - [id:946099227] Fetched 0 reviews (0 fetched in total)
3. Вы выполняете код, который указан в вопросе?
data
В коде, который у вас здесь, нет. Кроме того, вы пробовали очищать AppStore раньше? Может быть, ваш IP-адрес заблокирован?4. Это то, что я получаю
2020-10-02 17:05:32,555 [INFO] Base - Initialised: AppStore('us', 'yazio-fasting-food-tracker', 946099227) 2020-10-02 17:05:32,555 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/yazio-fasting-food-tracker/id946099227 2020-10-02 17:05:34,891 [INFO] Base - [id:946099227] Fetched 534 reviews (534 fetched in total)
5. Вау, это не то, что я вижу
Ответ №1:
Я немного поиграл с этой оболочкой API, и кажется, что результаты, которые вы получаете, либо основаны на IP, либо в оболочке есть что-то странное.
Во всяком случае, вот код, который я получил 533 отзыва. Весь дамп находится в pastebin.
import json
from app_store_scraper import AppStore
from pprint import pprint
appstore_app = AppStore(country="us", app_name="yazio-fasting-food-tracker", app_id=946099227)
appstore_app.review()
reviews = appstore_app.reviews
pprint(appstore_app.reviews_count)
for review in reviews:
review['date'] = review['date'].isoformat()
with open("data_dump.json", "w") as dd:
json.dump(reviews, dd, indent=4, sort_keys=True)
Это выводит:
2020-10-03 18:28:35,477 [INFO] Base - Initialised: AppStore('us', 'yazio-fasting-food-tracker', 946099227)
2020-10-03 18:28:35,477 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/yazio-fasting-food-tracker/id946099227
2020-10-03 18:28:40,681 [INFO] Base - [id:946099227] Fetched 260 reviews (260 fetched in total)
533
2020-10-03 18:28:46,415 [INFO] Base - [id:946099227] Fetched 533 reviews (533 fetched in total)