Как исправить превышение скорости передачи данных Jupyter Notbeook IOPub

#python #pandas #dataframe #dictionary #data-analysis

#питон #панды #фрейм данных #словарь #анализ данных

Вопрос:

Я пытаюсь распечатать фундаментальные данные, полученные из моего api TD Ameritrade, по каждой акции на NYSE, NASDAQ, AMEX и OTCBB. Проблема в том, что как только я попытаюсь распечатать его, я получу это:

 IOPub data rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_data_rate_limit`.  Current values: NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec) NotebookApp.rate_limit_window=3.0 (secs)   

Я просто пытаюсь взглянуть на то, как возвращаются данные, чтобы я знал, какой цикл мне нужен, чтобы создать фрейм данных, содержащий только те данные, которые я хочу вставить в свою базу данных.

Я не думаю, что вам нужно будет видеть весь мой код, чтобы ответить на этот вопрос, но я предоставил его ниже на случай, если у вас возникнут вопросы. Таким образом, все ответы есть, и вы можете точно видеть, что я делал от начала до начала.

 # This is an updated version that retrieves the company name and the symbol  def get_companies(exchange="NYSE"):  """  :param exchange: The Stock exchange for which you want   to get a current list of all the symbols for.  Default -gt; NYSE    create a list of every letter in the alphabet  Each page starts with a letter and contains the   symbols that start with that letter.   :param url: http://eoddata.com/stocklist/{}/{}.htm  """  alpha = list(string.ascii_uppercase)    symbols = []  name = []    # loop through the letters in the alphabet to get the stocks on each page   # from the table and store them in a list   for each in alpha:  url = "http://eoddata.com/stocklist/{}/{}.htm".format(exchange, each)  resp = requests.get(url)  site = resp.content  soup = bs(site, 'html.parser')  table = soup.find('table', {'class':'quotes'})  for row in table.findAll('tr')[1:]:  symbols.append(row.findAll('td')[0].text.rstrip())  for row in table.findAll('tr')[1:]:  name.append(row.findAll('td')[1].text.rstrip())    # remove the extra letters on the end of the symbols   symbols_clean = []  name_clean = []    for each in symbols:  each = each.replace('.','-')  symbols_clean.append((each.split('-')[0]))    for each in name:  each = each.replace('.','-')  name_clean.append((each.split('-')[0]))    return name_clean, symbols_clean  # TD Ameritrade api has a limit to the number of symbols you can get data for # at a time. So to get everything I have to chunk the list into 200 symbols at a time def chunks(l, n):  """  :param l: takes in a list  :param n: Lets you know how long you want each chunk to be  """  n = max(1, n)  return(l[i:i n] for i in range(0, len(l), n))  # symbols_chunked = list(chunks(list(set(stocks)), 200))   def request_quotes(stocks):  """  :param stocks: makes an api call for a list of stock symbols  :returns: a pandas dataframe of quote data   """  url = BASE   "marketdata/quotes" # market data url  params = {  'apikey': API_KEY,  'symbol': stocks  }  request = requests.get(  url=url,  params=params  ).json()    time.sleep(1) # set sleep so that api works    return pd.DataFrame.from_dict(request, orient='index').reset_index(drop=True)  def insert_quote_data(quote_df):  """  :param df: Quote Data Dataframe arg  :return: (1) Either an Error or Exception, (2) Success Message!  """  try:  conn = sql.connect("MarketDatabase.db")  cur = conn.cursor()  quote_df.to_sql(name='DailyQuoteData', con=conn, if_exists='append', index=False)  conn.close()  print("Data Insert Complete!")  except:  raise ValueError("Data not inserted correctly. Make sure it was a string object.")  def get_fundamentals(stocks):  """  :param stocks: List of stocks chuncked into 200 symbol chunks     :return: This will return tons of information that will then  be changed into dataframes and inserted into the database.   """  url = BASE   "instruments"    # pass params   params = {  'apikey': API_KEY,  'symbol': stocks,   'projection': 'fundamental'  }    request = requests.get(  url=url,  params=params  ).json()    time.sleep(1)    return pd.DataFrame.from_dict(request, orient='index').reset_index(drop='True')  NYSE_company, NYSE_symbol = get_companies(exchange="NYSE") NASDAQ_company, NASDAQ_symbol = get_companies(exchange="NASDAQ") AMEX_company, AMEX_symbol = get_companies(exchange="AMEX") OTCBB_company, OTCBB_symbol = get_companies(exchange="OTCBB") NYBOT_company, NYBOT_symbol = get_companies(exchange="NYBOT")  # Create dataframes that contain every publicly traded company # on each exchange.   # This cell will just create the dataframes for each individual exchange.  # I will then use the data from this cell to merge them all into one  # big group. Then I will begin trying to get all the data I  # can using such a big dataset.   columns = ["exchange", "symbol", "name"]  # New York Stock Exchange companies NYSE = list(zip(NYSE_symbol, NYSE_company)) NYSE = [("NYSE", )   elem for elem in NYSE] NYSE_df = pd.DataFrame([x for x in NYSE], columns=columns)  # NASDAQ Companies NASDAQ = list(zip(NASDAQ_symbol, NASDAQ_company)) NASDAQ = [("NASDAQ", )   elem for elem in NASDAQ] NASDAQ_df = pd.DataFrame([x for x in NASDAQ], columns=columns)  # American Stock Exchange Companies AMEX = list(zip(AMEX_symbol, AMEX_company)) AMEX = [("AMEX", )   elem for elem in AMEX] AMEX_df = pd.DataFrame([x for x in AMEX], columns=columns)  # Over the Counter Bulletin Board Exchange "Pink Sheets" # These are the penny stocks and I think their is a lot of  # possibilities with finding a niche in here  OTCBB = list(zip(OTCBB_symbol, OTCBB_company)) OTCBB = [("OTCBB", )   elem for elem in OTCBB] OTCBB_df = pd.DataFrame([x for x in OTCBB], columns=columns)  # Now we append all the dataframes together so that we have  # one massive master list. Also, the good think is we can still  # use the smaller datasets if need be.   companies_df = NYSE_df.append(NASDAQ_df) companies_df = companies_df.append(AMEX_df) companies_df = companies_df.append(OTCBB_df)  # Now check for duplicates and drop them from the main dataset  companies_df = companies_df.drop_duplicates()  # create the quote_df to pass to the already created functions quote_df = companies_df  # Break the symbols up into chunks  companies_chunked = list(chunks(list(set(quote_df['symbol'])), 200)) NYSE_chunked = list(chunks(list(set(NYSE_df['symbol'])), 200))  NASDAQ_chunked = list(chunks(list(set(NASDAQ_df['symbol'])), 200))  AMEX_chunked = list(chunks(list(set(AMEX_df['symbol'])), 200))  OTCBB_chunked = list(chunks(list(set(OTCBB_df['symbol'])), 200))  #=====================================================================# # Now I can loop through the chuncked list of symbols and call the api.  # Then append all the resulting dataframes into one.  NYSE_quote_df = pd.concat([request_quotes(each) for each in NYSE_chunked]) NASDAQ_quote_df = pd.concat([request_quotes(each) for each in NASDAQ_chunked]) AMEX_quote_df = pd.concat([request_quotes(each) for each in AMEX_chunked]) OTCBB_quote_df = pd.concat([request_quotes(each) for each in OTCBB_chunked])  # Combined exchanges company dataframe list companies_quote_df = pd.concat([request_quotes(each) for each in companies_chunked])  # Now I need to add the dates and format the dates for the database  companies_quote_df['date'] = pd.to_datetime(today_fmt) companies_quote_df['date'] = companies_quote_df['date'].dt.date companies_quote_df['divDate'] = pd.to_datetime(companies_quote_df['divDate']) companies_quote_df['divDate'] = companies_quote_df['divDate'].dt.date   # remove anything without a price  companies_quote_df = companies_quote_df.loc[companies_quote_df['bidPrice'] gt; 0]  # Rename columns, They can't start with a number  companies_quote_df = companies_quote_df.rename(columns={  '52WkHigh':'_52WkHigh',  '52WkLow': '_52WkLow' })  # THIS INSERTS THE QUOTE DATA FOR ALL COMBINED EXCHANGES INTO THE MarketDatabase.db insert_quote_data(companies_quote_df)  # GET THE FUNDAMENTAL DATA FOR COMBINED EXCHANGE LIST combined_fun_df = pd.concat([get_fundamentals(each) for each in companies_chunked])  _fund_list = list(combined_fun_df['fundamental']) print(_fund_list)  

Вот где я получаю IOPub data rate exceeded уведомление. Я попытался сделать for loop и создать задержку, чтобы все данные не пытались отображаться сразу, но это не сработало. Кто-нибудь знает функцию или фрагмент кода, который можно использовать, когда это произойдет, чтобы заставить print() оператор работать?

If I do a print(combined_fun_df) statement I receive:

 fundamental cusip symbol  0 {'symbol': 'GANX', 'high52': 17.93, 'low52': 6... 36269B105 GANX  1 {'symbol': 'IJJP', 'high52': 0.008, 'low52': 0... 44963Q204 IJJP  2 {'symbol': 'NVLPF', 'high52': 0.81, 'low52': 0... 66980G109 NVLPF  3 {'symbol': 'PTIXW', 'high52': 0.7999, 'low52':... 74365N111 PTIXW  4 {'symbol': 'FJP', 'high52': 56.5524, 'low52': ... 33737J158 FJP  .. ... ... ...  26 {'symbol': 'LMDX', 'high52': 11.0857, 'low52':... G5709L109 LMDX  27 {'symbol': 'YGYI', 'high52': 0.51, 'low52': 0.... 987537206 YGYI  28 {'symbol': 'BAX', 'high52': 88.32, 'low52': 73... 071813109 BAX  29 {'symbol': 'HRCXF', 'high52': 0.08, 'low52': 0... G4708G120 HRCXF  30 {'symbol': 'FFXXF', 'high52': 6.54, 'low52': 3... 42328X104 FFXXF    description exchange assetType  0 Gain Therapeutics, Inc. - Common Stock NASDAQ EQUITY  1 IJJ Corporation Common Stock Pink Sheet EQUITY  2 NOVA LEAP HEALTH CORP Common Stock (Canada) Pink Sheet EQUITY  3 Protagenic Therapeutics, Inc. - Warrant NASDAQ EQUITY  4 First Trust Japan AlphaDEX Fund NASDAQ ETF  .. ... ... ...  26 LumiraDx Limited - Common stock NASDAQ EQUITY  27 Youngevity International, Inc. Common Stock Pink Sheet EQUITY  28 Baxter International Inc. Common Stock NYSE EQUITY  29 Hurricane Energy PLC, Ordinary Shares (United ... Pink Sheet EQUITY  30 HELIOS FAIRFAX PARTNERS CORP Ordinary Shares Pink Sheet EQUITY   
  • But all I am interested in is the fundamental data. I want to create a dataframe for the fundamental data and insert that into my database.
  • But when I do print(combined_fun_df['fundamental'] I only receive individual dictionaries:
 0 {'symbol': 'GANX', 'high52': 17.93, 'low52': 6... 1 {'symbol': 'IJJP', 'high52': 0.008, 'low52': 0... 2 {'symbol': 'NVLPF', 'high52': 0.81, 'low52': 0... 3 {'symbol': 'PTIXW', 'high52': 0.7999, 'low52':... 4 {'symbol': 'FJP', 'high52': 56.5524, 'low52': ...  ...  26 {'symbol': 'LMDX', 'high52': 11.0857, 'low52':... 27 {'symbol': 'YGYI', 'high52': 0.51, 'low52': 0.... 28 {'symbol': 'BAX', 'high52': 88.32, 'low52': 73... 29 {'symbol': 'HRCXF', 'high52': 0.08, 'low52': 0... 30 {'symbol': 'FFXXF', 'high52': 6.54, 'low52': 3... Name: fundamental, Length: 20152, dtype: object  

Итак, как я могу извлечь основные данные, взять «ключи» и сделать их именем столбца фрейма данных, а затем вставить значения в фрейм данных?