python - SpecificationError 的解决方案:在 agg() 和 groupby() 时不支持嵌套重命名器
问题描述
def stack_plot(data, xtick, col2='project_is_approved', col3='total'):
ind = np.arange(data.shape[0])
plt.figure(figsize=(20,5))
p1 = plt.bar(ind, data[col3].values)
p2 = plt.bar(ind, data[col2].values)
plt.ylabel('Projects')
plt.title('Number of projects aproved vs rejected')
plt.xticks(ind, list(data[xtick].values))
plt.legend((p1[0], p2[0]), ('total', 'accepted'))
plt.show()
def univariate_barplots(data, col1, col2='project_is_approved', top=False):
# Count number of zeros in dataframe python: https://stackoverflow.com/a/51540521/4084039
temp = pd.DataFrame(project_data.groupby(col1)[col2].agg(lambda x: x.eq(1).sum())).reset_index()
# Pandas dataframe grouby count: https://stackoverflow.com/a/19385591/4084039
temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'total':'count'})).reset_index()['total']
temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'Avg':'mean'})).reset_index()['Avg']
temp.sort_values(by=['total'],inplace=True, ascending=False)
if top:
temp = temp[0:top]
stack_plot(temp, xtick=col1, col2=col2, col3='total')
print(temp.head(5))
print("="*50)
print(temp.tail(5))
univariate_barplots(project_data, 'school_state', 'project_is_approved', False)
错误:
SpecificationError Traceback (most recent call last)
<ipython-input-21-2cace8f16608> in <module>()
----> 1 univariate_barplots(project_data, 'school_state', 'project_is_approved', False)
<ipython-input-20-856fcc83737b> in univariate_barplots(data, col1, col2, top)
4
5 # Pandas dataframe grouby count: https://stackoverflow.com/a/19385591/4084039
----> 6 temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'total':'count'})).reset_index()['total']
7 print (temp['total'].head(2))
8 temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'Avg':'mean'})).reset_index()['Avg']
~\AppData\Roaming\Python\Python36\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, *args, **kwargs)
251 # but not the class list / tuple itself.
252 func = _maybe_mangle_lambdas(func)
--> 253 ret = self._aggregate_multiple_funcs(func)
254 if relabeling:
255 ret.columns = columns
~\AppData\Roaming\Python\Python36\site-packages\pandas\core\groupby\generic.py in _aggregate_multiple_funcs(self, arg)
292 # GH 15931
293 if isinstance(self._selected_obj, Series):
--> 294 raise SpecificationError("nested renamer is not supported")
295
296 columns = list(arg.keys())
SpecificationError: **nested renamer is not supported**
解决方案
改变
temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'total':'count'})).reset_index()['total']
temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'Avg':'mean'})).reset_index()['Avg']
至
temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg(total='count')).reset_index()['total']
temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg(Avg='mean')).reset_index()['Avg']
原因:在新的 pandas 版本中,命名聚合是建议替代已弃用的“dict-of-dicts”方法来命名列特定聚合的输出(重命名时使用字典弃用 groupby.agg())。
来源:https ://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.25.0.html
推荐阅读
- hadoop - Hadoop MapReduce 没有产生所需的输出
- swift - Swift 5 UIScrollView 添加背景图片
- html - 将加载器对齐到屏幕的中心
- git - 安装 git Windows 10 时遇到问题
- ipc - C# 中的 MemoryMappedFiles 用于 Docker 容器之间的 IPC
- php - laravel 雄辩,有多个条件问题
- adobe - After Effects 上的错误代码 72 和 Creative Cloud 桌面应用程序上的 p42
- java - 为类创建比较器有技巧吗?
- python - 使用 pytorch 添加第二个输入 LSTM 时间序列
- perl - 不要停止 perl on:{,} 中的量词大于正则表达式中的 32766