loops - 行迭代(两个循环)自动化 Pandas DataFrame
问题描述
根据 df1 中的指标“H”和“G”,我对每一行都有以下迭代(for 循环)。使用选定指标的乘积创建一个新列。现在我希望所有指标都自动下降(如果我有超过“H”和“G”)。不幸的是,我正在努力将其放入字典中。
有人能帮忙吗?谢谢您,祝您度过愉快的一周。
df1 =pd.DataFrame({'Country':['Armenia','Azerbaidjan','Belarus','Armenia','Azerbaidjan','Belarus'],\
'Indictaor':['G','G','G','H', 'H', 'H'],'2005':[3,4,5,6,7,4],'2006':[6,3,1,3,5,6]})
df2 = pd.DataFrame({'Year':[2005,2006,2005,2006],
'Country1':['Armenia','Armenia','Azerbaidjan','Azerbaidjan'],
'Country2': ['Belarus','Belarus','Belarus','Belarus']})
df3 = pd.DataFrame({'Year':[2005,2006,2005,2006],
'Country2': ['Belarus','Belarus','Belarus','Belarus'],
'Country1':['Armenia','Armenia','Azerbaidjan','Azerbaidjan'],
'IndictaorGProduct':[15,6,35,5],
'IndictaorHProduct':[24,18,28,30]})
gprod = []
hprod =[]
for row in df4.iterrows() :
c1 = row[1][2]
c2 = row[1][1]
yr = str(row[1][0])
g1 = df1.loc[(df1['Country']==c1)&(df1['Indictaor']=='G')]
g1val = g1[yr].values[0]
g2 = df1.loc[(df1['Country']==c2)&(df1['Indictaor']=='G')]
g2val = g2[yr].values[0]
print(g1val, g2val, g1val*g2val)
gprod.append(g1val*g2val)
df4['GProduct'] = gprod
for row in df4.iterrows() :
c1 = row[1][2]
c2 = row[1][1]
yr = str(row[1][0])
g1 = df1.loc[(df1['Country']==c1)&(df1['Indictaor']=='H')]
g1val = g1[yr].values[0]
g2 = df1.loc[(df1['Country']==c2)&(df1['Indictaor']=='H')]
g2val = g2[yr].values[0]
print(g1val, g2val, g1val*g2val)
gprod.append(g1val*g2val)
df4['HProduct'] = hprod
解决方案
这取决于您从哪里获得指标。您决定它们还是从列中获取它们?
如果您从相应的列中获取它们,您可以使用该列从该列中获取具有唯一值的列表。然后你可以在第二个循环中循环这些值。但请注意,根据您的数据大小,这可能不是很有效。
但是,您可以这样做:
import pandas as pd
df1 = pd.DataFrame({'Country': ['Armenia', 'Azerbaidjan', 'Belarus', 'Armenia', 'Azerbaidjan', 'Belarus'], \
'Indictaor': ['G', 'G', 'G', 'H', 'H', 'H'], '2005': [3, 4, 5, 6, 7, 4],
'2006': [6, 3, 1, 3, 5, 6]})
df2 = pd.DataFrame({'Year': [2005, 2006, 2005, 2006],
'Country1': ['Armenia', 'Armenia', 'Azerbaidjan', 'Azerbaidjan'],
'Country2': ['Belarus', 'Belarus', 'Belarus', 'Belarus']})
df3 = pd.DataFrame({'Year': [2005, 2006, 2005, 2006],
'Country2': ['Belarus', 'Belarus', 'Belarus', 'Belarus'],
'Country1': ['Armenia', 'Armenia', 'Azerbaidjan', 'Azerbaidjan'],
'IndictaorGProduct': [15, 6, 35, 5],
'IndictaorHProduct': [24, 18, 28, 30]})
cols = ['Year', 'Country2', 'Country1']
df4 = pd.DataFrame(columns=cols)
df4['Year'] = df2['Year']
df4['Country1'] = df2['Country1']
df4['Country2'] = df2['Country2']
indicators = df1["Indictaor"].unique() # get all the unique indicators from the indicators column, you could also manually have alist with the indicators you want to loop over
for i in indicators:
prod = []
for row in df4.iterrows():
c1 = row[1][2]
c2 = row[1][1]
yr = str(row[1][0])
g1 = df1.loc[(df1['Country'] == c1) & (df1['Indictaor'] == i)] # compare to the indicator in the list
g1val = g1[yr].values[0]
g2 = df1.loc[(df1['Country'] == c2) & (df1['Indictaor'] == i)]
g2val = g2[yr].values[0]
print(g1val, g2val, g1val * g2val)
prod.append(g1val * g2val)
colname = "".join([i,"Product"])
df4[colname] = prod
print("Done")