首页 > 解决方案 > 将字典的字典转换为数据框

问题描述

我正在尝试通过 IEX API 循环代码符号列表,以获得大量代码的市账率。最终目标是在一个 csv 文件中包含所有的市账率。对于多个代码,它以我没有很多经验的字典形式返回资产负债表信息。

以下代码仅使用资产负债表返回位于数据框中的单个代码。

import pandas as pd
from iexfinance.stocks import Stock
from datetime import datetime
import matplotlib.pyplot as plt
from iexfinance.stocks import get_historical_data
from pathlib import Path
import numpy as np
import csv

output_file = pd.DataFrame()

filename = r'C:\Users########'

Ticker = []
with open(filename, newline='') as inputfile:
    for row in csv.reader(inputfile):
        Ticker.append(row[0])

for i in Ticker:
#Obtain information for ticker company when using a single ticker
#Ticker = ['FRT']

    #Obtaining Balance Sheets
    stock = Stock(Ticker, token='##################', output_format='pandas')
    bs = stock.get_balance_sheet()

    #Obtaining the common stock outstanding on the last filing
    common_stock_outstanding = stock.get_shares_outstanding()

    #Obtaining the current price
    current_price = stock.get_price()

    #Obtaining the Market Cap
    market_cap = common_stock_outstanding * current_price

    #Obtaining the BV per share
    bs['BV'] = bs['shareholderEquity'] / common_stock_outstanding
    bs.drop(bs.iloc[:, 0:38], inplace = True, axis = 1)
    BV = pd.DataFrame(bs)
    BV.columns = ['1']
    BV = BV.reset_index(drop=True)

    #Obtaining the Price to Book
    current_price = pd.DataFrame(current_price)
    current_price = current_price.reset_index(drop=True)
    current_price.columns = ['2']
    current_price

    PtB = pd.merge(BV, current_price, left_index=True, right_index=True)

    PtB['PtB'] = PtB['2'] / PtB['1']
    PtB.drop(PtB.iloc[:, 0:2], inplace = True, axis = 1)
    PtB.columns = [Ticker]

    #Appending the PtB into the Output File
    output_file = pd.DataFrame()
    output_file = output_file.append(PtB)
    output_file

这是两个 Ticker RFP 和 APA 的返回输出:

 {'RFP':             accountsPayable capitalSurplus  commonStock currency  \
 2021-03-01        251000000           None     82200000      USD   

             currentAssets  currentCash  currentLongTermDebt filingType  \
 2021-03-01      900000000    113000000              2000000       10-K   

             fiscalDate  fiscalQuarter  ...  symbol  totalAssets  \
 2021-03-01  2020-12-31              4  ...     RFP   3730000000   

             totalCurrentLiabilities  totalLiabilities  treasuryStock  \
 2021-03-01                380000000        2649000000     -174000000   

                        id  key     subkey           date        updated  
 2021-03-01  BALANCE_SHEET  RFP  quarterly  1609372800000  1614775188000  

 [1 rows x 38 columns],
 'APA':             accountsPayable capitalSurplus  commonStock currency  \
 2020-07-30        459000000           None  12006000000      USD   

             currentAssets  currentCash  currentLongTermDebt filingType  \
 2020-07-30     1658000000    135000000            294000000       10-Q   

             fiscalDate  fiscalQuarter  ...  symbol  totalAssets  \
 2020-07-30  2020-06-30              2  ...     APA  12999000000   

             totalCurrentLiabilities  totalLiabilities  treasuryStock  \
 2020-07-30               1710000000       14634000000    -3189000000   

                        id  key     subkey           date        updated  
 2020-07-30  BALANCE_SHEET  APA  quarterly  1593475200000  1608330865000  

 [1 rows x 38 columns]}

我尝试使用 pd.DataFrame.from_dict(bs, orient='index') 将字典的字典放入数据框中,但收到错误:

KeyError                                  Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2896             try:
-> 2897                 return self._engine.get_loc(key)
   2898             except KeyError:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 0

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-17-b29d66f7b6fd> in <module>
----> 1 pd.DataFrame.from_dict(bs, orient='index')

~\Anaconda3\lib\site-packages\pandas\core\frame.py in from_dict(cls, data, orient, dtype, columns)
   1188             raise ValueError("only recognize index or columns for orient")
   1189 
-> 1190         return cls(data, index=index, columns=columns, dtype=dtype)
   1191 
   1192     def to_numpy(self, dtype=None, copy=False):

 ~\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
    462                     mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
    463                 else:
--> 464                     mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
    465             else:
    466                 mgr = init_dict({}, index, columns, dtype=dtype)

 ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, 
 columns, dtype, copy)
    169     # by definition an array here
    170     # the dtypes will be coerced to a single dtype
--> 171     values = prep_ndarray(values, copy=copy)
    172 
    173     if dtype is not None:

~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py in prep_ndarray(values, copy)
    274         try:
    275             if is_list_like(values[0]) or hasattr(values[0], "len"):
--> 276                 values = np.array([convert(v) for v in values])
    277             elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
    278                 # GH#21861

~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
    2978             if self.columns.nlevels > 1:
    2979                 return self._getitem_multilevel(key)
 -> 2980             indexer = self.columns.get_loc(key)
    2981             if is_integer(indexer):
    2982                 indexer = [indexer]

~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
    2897                 return self._engine.get_loc(key)
    2898             except KeyError:
 -> 2899                 return self._engine.get_loc(self._maybe_cast_indexer(key))
    2900         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
    2901         if indexer.ndim > 1 or indexer.size > 1:

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 0

我知道代码的构造可能效率低下,但此时我只想了解如何使用这个 dicts 输出的 dict。必须有办法做到这一点,但目前这超出了我的技能范围。谢谢你的帮助。

标签: pythonpandasdictionaryfor-loopelixir-iex

解决方案


在您提供您尝试从中创建数据框的确切字典之前,我认为我们无法为您提供帮助。无论如何,作为示例,请看以下示例:

>>data = {'name': {'first':'hary', 'second':'porter'}, 'address':{'temp': 'USA', 'perm': 'UK'}}
>>data
{'name': {'first': 'hary', 'second': 'porter'}, 'address': {'temp': 'USA', 'perm': 'UK'}}
>>pd.DataFrame(data)
          name address
first     hary     NaN
second  porter     NaN
temp       NaN     USA
perm       NaN      UK

推荐阅读