python - Python外连接
问题描述
下面的代码用于计算统计值。
import re
from pathlib import Path
import pandas as pd
def prepare_values(df):
df_columns = ['frame.time_delta_displayed', 'frame.len']
df_values = []
for col in df_columns:
df_values +=[
df[col].max(),
df[col].min(),
df[col].std(),
df[col].quantile(0.25),
df[col].quantile(0.5),
df[col].quantile(0.75),
df[col].mean(),
df[col].mad(),
df[col].var(),
df[col].skew(),
df[col].kurtosis(),
df[col].sum(),
]
return df_values
source_dir = Path('/media/root/HASARA/Snipping Experiment-App Activities/Time-0.5/InOutFiltered')
in_data = []
for file in source_dir.glob('**/*.in.csv'):
activity = {'activity': file.stem.split('.')[0]}
df = pd.read_csv(file)
cols =['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn', 'q3TimeIn', 'meanTimeIn', 'madTimeIn', 'varianceTimeIn', 'skewTimeIn', 'kurtosisTimeIn', 'sumTimeIn', 'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn','q2lenIn', 'q3LenIn', 'meanLenIn', 'madLenIn', 'varianceLenIn', 'skewLenIn', 'kurtosisLenIn', 'sumLenIn']
values = prepare_values(df)
file_data ={**activity, **dict(zip(cols,values))}
in_data.append(file_data)
out_data =[]
for file in source_dir.glob('**/*.out.csv'):
activity = {'activity': file.stem.split('.')[0]}
df = pd.read_csv(file)
cols =['maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut', 'q3TimeOut', 'meanTimeOut', 'madTimeOut', 'varianceTimeOut', 'skewTimeOut', 'kurtosisTimeOut', 'sumTimeOut', 'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut', 'q3LenOut', 'meanLenOut', 'madLenOut', 'varianceLenOut', 'skewLenOut', 'kurtosisLenOut','sumLenOut']
values=prepare_values(df)
file_data = {**activity, **dict(zip(cols, values))}
out_data.append(file_data)
in_df = pd.DataFrame(in_data)
out_df = pd.DataFrame(out_data)
all_df = in_df.join(out_df.set_index('activity'), on='activity', how='outer')
all_df.dropna(subset=all_df.columns.tolist()[1:], how='all', inplace=True)
all_df.fillna(0, inplace=True)
all_df['activity'] = all_df['activity'].apply(lambda x:re.sub(r'^([a-zA-Z]+).*', r'\1',x))
all_df.to_csv('/media/root/HASARA/Snipping Experiment-App Activities/Time-0.5/AllDataNew.csv', index=False)
我收到一个错误。想不通是什么意思。
Traceback (most recent call last):
File "/root/PycharmProjects/AppAct/StatisticCal.py", line 48, in <module>
all_df= in_df.join(out_df.set_index('activity'), on='activity', how='outer')
File "/root/PycharmProjects/AppAct/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 4178, in set_index
level = frame[col]._values
File "/root/PycharmProjects/AppAct/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "/root/PycharmProjects/AppAct/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'activity'
解决方案
推荐阅读
- reactjs - 我可以实例化一个类并调用该方法吗
- .net - “端点无法访问。刷新缓存并重试”Azure Cosmos DB SQL API、.NET SDK
- python - 如何防止 tkinter 框架在一组框架中调整大小
- javascript - 在选择框中动态生成选项
- java - 图像在设备上消失但在 android studio 预览中显示
- oracle - 运行 dbms_stats.gather_table_stats 时统计信息是否可用
- maven - 在 Eclipse Luna 中创建一个 Maven 项目抱怨“无法解析原型”
- java - Java - 将响应时间从 Android Studio 传递到“phpmyadmin”
- javascript - 填充反应组件导致“TypeError:无法读取未定义的属性'map'”
- android - 如何在使用 android studio 的音频播放器中保存音频播放器的当前播放时间,即 1:45/25:40?