首页 > 解决方案 > 过滤变量以从数据框中绘制

问题描述

我有一个类似于以下的数据框:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import matplotlib as mpl
sns.set()
df = pd.DataFrame({ 

    # some ways to create random data
    'scenario':np.random.choice( ['BAU','ETS','ESD'], 100),
    'region':np.random.choice( ['Italy','France'], 100),
    'variable':np.random.choice( ['GDP','GHG','PROD01','PROD02','PROD03','PROD04'], 100),
    '2015':np.random.randn(100),
    '2016':np.random.randn(100),
    '2017':np.random.randn(100),
    '2018':np.random.randn(100),
    '2019':np.random.randn(100),
    '2020':np.random.randn(100),
    '2021':np.random.randn(100)
    })

我定义了 plot 函数来绘制多年来的变量:

def name_plot(df, scenario, region, variable):
    data = df.loc[scenario, region, variable]

    plt.bar(data.index, data['value'], label=scenario)
    plt.title(f'{region} {variable} ')
    plt.legend() 

    plt.tight_layout()
    plt.savefig(f'{region} {variable} .png')
    plt.clf()

我重新排列数据并计算百分比变化:

df2=pd.melt(df,id_vars=['scenario','region','variable',],var_name='year')
all_names_index = df2.set_index(['scenario','region','variable','year']).sort_index()
percent=all_names_index.subtract(all_names_index.loc['BAU']).div(all_names_index.loc['BAU']).mul(100)
percent = percent.reset_index().set_index(['scenario', 'region', 'variable', 'year'])

最后我绘制:

### ITALY PLOTS
#names = ['BAU', 'ETS', 'ESD', 'ETSALL', 'ETSFFS']
names = ['BAU', 'ETS', 'ESD']
#names = ['BAU']

for scenario in names:
    name_plot(percent, scenario, 'Italy', 'GHG')
    plt.xlabel('Years')
    plt.ylabel('% change')
plt.clf()

现在,我只需要为 2021 年创建一个条形图,以比较同一变量的不同场景。像这样,数据是手动定义的:

# data to plot
n_groups = 7
means_ref = (-2.7, -0.1, -0.4, -8.6, -2.1, -1.1, 1.1)
means_tris = (-2.3, 0.6, 1.2, -8.3, -1.2, -4.2, 22.9)
means_cuneo = (-1.8, 0.7, 2.2, -8.0, -0.5, -0.3, 0.3)


# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.3
bar_width2 = 0.6
width = .0
opacity = 0.8
 
rects1 = plt.bar(index, means_ref, bar_width,
                 alpha=opacity,
                 color='b',
                 label='a', align='center')

rects2 = plt.bar(index + bar_width, means_tris, bar_width,
                 alpha=opacity,
                 color='g',
                 label='b', align='center')

rects3 = plt.bar(index + bar_width2, means_cuneo, bar_width,
                 alpha=opacity,
                 color='r',
                 label='c', align='center')

ax.set_xticklabels(index, rotation=45)
#plt.xlabel('percent')
plt.ylabel('% change wrt 2015')
plt.title('Production')
plt.xticks(index + width, ('Agriculture', 'Industry', 'Services', 'FF Energy', 'Transport', 'FF Electricity', 'RES'))
#plt.xticks(rotation='vertical')
#plt.tick_params(axis = 'both', which = 'minor', labelsize = 12, rotation=45)
#plt.set_xticklabels(labels, rotation=45)
plt.legend()
#plt.show()   
fig.subplots_adjust(bottom=0.3)
plt.savefig('prod_Subs.png')
plt.clf()

问题是:我该怎么做,使用上面构建的数据框百分比?

标签: pythonpandasdataframematplotlib

解决方案


推荐阅读