#python #pandas #dataframe #dictionary #data-analysis
#питон #панды #фрейм данных #словарь #анализ данных
Вопрос:
Я пытаюсь распечатать фундаментальные данные, полученные из моего api TD Ameritrade, по каждой акции на NYSE, NASDAQ, AMEX и OTCBB. Проблема в том, что как только я попытаюсь распечатать его, я получу это:
IOPub data rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_data_rate_limit`. Current values: NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec) NotebookApp.rate_limit_window=3.0 (secs)
Я просто пытаюсь взглянуть на то, как возвращаются данные, чтобы я знал, какой цикл мне нужен, чтобы создать фрейм данных, содержащий только те данные, которые я хочу вставить в свою базу данных.
Я не думаю, что вам нужно будет видеть весь мой код, чтобы ответить на этот вопрос, но я предоставил его ниже на случай, если у вас возникнут вопросы. Таким образом, все ответы есть, и вы можете точно видеть, что я делал от начала до начала.
# This is an updated version that retrieves the company name and the symbol def get_companies(exchange="NYSE"): """ :param exchange: The Stock exchange for which you want to get a current list of all the symbols for. Default -gt; NYSE create a list of every letter in the alphabet Each page starts with a letter and contains the symbols that start with that letter. :param url: http://eoddata.com/stocklist/{}/{}.htm """ alpha = list(string.ascii_uppercase) symbols = [] name = [] # loop through the letters in the alphabet to get the stocks on each page # from the table and store them in a list for each in alpha: url = "http://eoddata.com/stocklist/{}/{}.htm".format(exchange, each) resp = requests.get(url) site = resp.content soup = bs(site, 'html.parser') table = soup.find('table', {'class':'quotes'}) for row in table.findAll('tr')[1:]: symbols.append(row.findAll('td')[0].text.rstrip()) for row in table.findAll('tr')[1:]: name.append(row.findAll('td')[1].text.rstrip()) # remove the extra letters on the end of the symbols symbols_clean = [] name_clean = [] for each in symbols: each = each.replace('.','-') symbols_clean.append((each.split('-')[0])) for each in name: each = each.replace('.','-') name_clean.append((each.split('-')[0])) return name_clean, symbols_clean # TD Ameritrade api has a limit to the number of symbols you can get data for # at a time. So to get everything I have to chunk the list into 200 symbols at a time def chunks(l, n): """ :param l: takes in a list :param n: Lets you know how long you want each chunk to be """ n = max(1, n) return(l[i:i n] for i in range(0, len(l), n)) # symbols_chunked = list(chunks(list(set(stocks)), 200)) def request_quotes(stocks): """ :param stocks: makes an api call for a list of stock symbols :returns: a pandas dataframe of quote data """ url = BASE "marketdata/quotes" # market data url params = { 'apikey': API_KEY, 'symbol': stocks } request = requests.get( url=url, params=params ).json() time.sleep(1) # set sleep so that api works return pd.DataFrame.from_dict(request, orient='index').reset_index(drop=True) def insert_quote_data(quote_df): """ :param df: Quote Data Dataframe arg :return: (1) Either an Error or Exception, (2) Success Message! """ try: conn = sql.connect("MarketDatabase.db") cur = conn.cursor() quote_df.to_sql(name='DailyQuoteData', con=conn, if_exists='append', index=False) conn.close() print("Data Insert Complete!") except: raise ValueError("Data not inserted correctly. Make sure it was a string object.") def get_fundamentals(stocks): """ :param stocks: List of stocks chuncked into 200 symbol chunks :return: This will return tons of information that will then be changed into dataframes and inserted into the database. """ url = BASE "instruments" # pass params params = { 'apikey': API_KEY, 'symbol': stocks, 'projection': 'fundamental' } request = requests.get( url=url, params=params ).json() time.sleep(1) return pd.DataFrame.from_dict(request, orient='index').reset_index(drop='True') NYSE_company, NYSE_symbol = get_companies(exchange="NYSE") NASDAQ_company, NASDAQ_symbol = get_companies(exchange="NASDAQ") AMEX_company, AMEX_symbol = get_companies(exchange="AMEX") OTCBB_company, OTCBB_symbol = get_companies(exchange="OTCBB") NYBOT_company, NYBOT_symbol = get_companies(exchange="NYBOT") # Create dataframes that contain every publicly traded company # on each exchange. # This cell will just create the dataframes for each individual exchange. # I will then use the data from this cell to merge them all into one # big group. Then I will begin trying to get all the data I # can using such a big dataset. columns = ["exchange", "symbol", "name"] # New York Stock Exchange companies NYSE = list(zip(NYSE_symbol, NYSE_company)) NYSE = [("NYSE", ) elem for elem in NYSE] NYSE_df = pd.DataFrame([x for x in NYSE], columns=columns) # NASDAQ Companies NASDAQ = list(zip(NASDAQ_symbol, NASDAQ_company)) NASDAQ = [("NASDAQ", ) elem for elem in NASDAQ] NASDAQ_df = pd.DataFrame([x for x in NASDAQ], columns=columns) # American Stock Exchange Companies AMEX = list(zip(AMEX_symbol, AMEX_company)) AMEX = [("AMEX", ) elem for elem in AMEX] AMEX_df = pd.DataFrame([x for x in AMEX], columns=columns) # Over the Counter Bulletin Board Exchange "Pink Sheets" # These are the penny stocks and I think their is a lot of # possibilities with finding a niche in here OTCBB = list(zip(OTCBB_symbol, OTCBB_company)) OTCBB = [("OTCBB", ) elem for elem in OTCBB] OTCBB_df = pd.DataFrame([x for x in OTCBB], columns=columns) # Now we append all the dataframes together so that we have # one massive master list. Also, the good think is we can still # use the smaller datasets if need be. companies_df = NYSE_df.append(NASDAQ_df) companies_df = companies_df.append(AMEX_df) companies_df = companies_df.append(OTCBB_df) # Now check for duplicates and drop them from the main dataset companies_df = companies_df.drop_duplicates() # create the quote_df to pass to the already created functions quote_df = companies_df # Break the symbols up into chunks companies_chunked = list(chunks(list(set(quote_df['symbol'])), 200)) NYSE_chunked = list(chunks(list(set(NYSE_df['symbol'])), 200)) NASDAQ_chunked = list(chunks(list(set(NASDAQ_df['symbol'])), 200)) AMEX_chunked = list(chunks(list(set(AMEX_df['symbol'])), 200)) OTCBB_chunked = list(chunks(list(set(OTCBB_df['symbol'])), 200)) #=====================================================================# # Now I can loop through the chuncked list of symbols and call the api. # Then append all the resulting dataframes into one. NYSE_quote_df = pd.concat([request_quotes(each) for each in NYSE_chunked]) NASDAQ_quote_df = pd.concat([request_quotes(each) for each in NASDAQ_chunked]) AMEX_quote_df = pd.concat([request_quotes(each) for each in AMEX_chunked]) OTCBB_quote_df = pd.concat([request_quotes(each) for each in OTCBB_chunked]) # Combined exchanges company dataframe list companies_quote_df = pd.concat([request_quotes(each) for each in companies_chunked]) # Now I need to add the dates and format the dates for the database companies_quote_df['date'] = pd.to_datetime(today_fmt) companies_quote_df['date'] = companies_quote_df['date'].dt.date companies_quote_df['divDate'] = pd.to_datetime(companies_quote_df['divDate']) companies_quote_df['divDate'] = companies_quote_df['divDate'].dt.date # remove anything without a price companies_quote_df = companies_quote_df.loc[companies_quote_df['bidPrice'] gt; 0] # Rename columns, They can't start with a number companies_quote_df = companies_quote_df.rename(columns={ '52WkHigh':'_52WkHigh', '52WkLow': '_52WkLow' }) # THIS INSERTS THE QUOTE DATA FOR ALL COMBINED EXCHANGES INTO THE MarketDatabase.db insert_quote_data(companies_quote_df) # GET THE FUNDAMENTAL DATA FOR COMBINED EXCHANGE LIST combined_fun_df = pd.concat([get_fundamentals(each) for each in companies_chunked]) _fund_list = list(combined_fun_df['fundamental']) print(_fund_list)
Вот где я получаю IOPub data rate exceeded
уведомление. Я попытался сделать for loop
и создать задержку, чтобы все данные не пытались отображаться сразу, но это не сработало. Кто-нибудь знает функцию или фрагмент кода, который можно использовать, когда это произойдет, чтобы заставить print()
оператор работать?
If I do a print(combined_fun_df)
statement I receive:
fundamental cusip symbol 0 {'symbol': 'GANX', 'high52': 17.93, 'low52': 6... 36269B105 GANX 1 {'symbol': 'IJJP', 'high52': 0.008, 'low52': 0... 44963Q204 IJJP 2 {'symbol': 'NVLPF', 'high52': 0.81, 'low52': 0... 66980G109 NVLPF 3 {'symbol': 'PTIXW', 'high52': 0.7999, 'low52':... 74365N111 PTIXW 4 {'symbol': 'FJP', 'high52': 56.5524, 'low52': ... 33737J158 FJP .. ... ... ... 26 {'symbol': 'LMDX', 'high52': 11.0857, 'low52':... G5709L109 LMDX 27 {'symbol': 'YGYI', 'high52': 0.51, 'low52': 0.... 987537206 YGYI 28 {'symbol': 'BAX', 'high52': 88.32, 'low52': 73... 071813109 BAX 29 {'symbol': 'HRCXF', 'high52': 0.08, 'low52': 0... G4708G120 HRCXF 30 {'symbol': 'FFXXF', 'high52': 6.54, 'low52': 3... 42328X104 FFXXF description exchange assetType 0 Gain Therapeutics, Inc. - Common Stock NASDAQ EQUITY 1 IJJ Corporation Common Stock Pink Sheet EQUITY 2 NOVA LEAP HEALTH CORP Common Stock (Canada) Pink Sheet EQUITY 3 Protagenic Therapeutics, Inc. - Warrant NASDAQ EQUITY 4 First Trust Japan AlphaDEX Fund NASDAQ ETF .. ... ... ... 26 LumiraDx Limited - Common stock NASDAQ EQUITY 27 Youngevity International, Inc. Common Stock Pink Sheet EQUITY 28 Baxter International Inc. Common Stock NYSE EQUITY 29 Hurricane Energy PLC, Ordinary Shares (United ... Pink Sheet EQUITY 30 HELIOS FAIRFAX PARTNERS CORP Ordinary Shares Pink Sheet EQUITY
- But all I am interested in is the fundamental data. I want to create a dataframe for the fundamental data and insert that into my database.
- But when I do
print(combined_fun_df['fundamental']
I only receive individual dictionaries:
0 {'symbol': 'GANX', 'high52': 17.93, 'low52': 6... 1 {'symbol': 'IJJP', 'high52': 0.008, 'low52': 0... 2 {'symbol': 'NVLPF', 'high52': 0.81, 'low52': 0... 3 {'symbol': 'PTIXW', 'high52': 0.7999, 'low52':... 4 {'symbol': 'FJP', 'high52': 56.5524, 'low52': ... ... 26 {'symbol': 'LMDX', 'high52': 11.0857, 'low52':... 27 {'symbol': 'YGYI', 'high52': 0.51, 'low52': 0.... 28 {'symbol': 'BAX', 'high52': 88.32, 'low52': 73... 29 {'symbol': 'HRCXF', 'high52': 0.08, 'low52': 0... 30 {'symbol': 'FFXXF', 'high52': 6.54, 'low52': 3... Name: fundamental, Length: 20152, dtype: object
Итак, как я могу извлечь основные данные, взять «ключи» и сделать их именем столбца фрейма данных, а затем вставить значения в фрейм данных?