python - Python:具有开始日期和结束日期的数据框,解压缩为 1 个日期字段
问题描述
我有一个这样的数据框 *EDITED
StartDate EndDate Company Location
2019-01-15 2019-01-31 1.0 121.0
2019-02-01 2020-03-10 1.0 136.0
2006-10-02 2020-03-10 2.0 136.0
2003-07-31 2020-03-10 2.0 321.0
2010-11-03 2020-03-10 3.0 322.0
2013-02-01 2017-02-07 4.0 375.0
2017-02-08 2019-01-14 4.0 375.0
2019-01-15 2019-04-29 4.0 375.0
2019-04-30 2020-03-10 4.0 375.0
如此链接中所述:Pandas:将日期范围解压缩为单个日期
我希望它仅解压缩为 1 个字段,即日期。我按照解决方案中的步骤进行了操作。但是,当我尝试使用 resample 进行分组时,我收到此错误:ValueError: cannot reindex a non-unique index with a method or limit
发生这种情况的原因是什么?
更清楚地说,这是我的代码(原始数据帧的索引只是普通索引 1、2、3,...
df=read_parquet('company_location.parquet')
df=df[['COMPANY','STARTDATE','ENDDATE','LOCATION']]
df['STARTDATE']=pd.to_datetime(df['STARTDATE'])
df['ENDDATE']=pd.to_datetime(df['ENDDATE'])
df=df.dropna(axis=0,how='any')
df['rows']=range(len(df))
starts=df[['COMPANY','STARTDATE','LOCATION','rows']].rename(columns={'STARTDATE':'DATE'})
ends=df[['COMPANY','ENDDATE','LOCATION','rows']].rename(columns={'ENDDATE':'DATE'})
df_decomp=pd.concat([starts,ends])
df_decomp=df_decomp.set_index('rows', append=True)
df_decomp.sort_index()
一切都很好,直到这里。
然后当我写这行时,有一个错误:
df_decomp=df_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
错误是:(这是 Jupyter Notebook)
ValueError Traceback (most recent call last)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
688 try:
--> 689 result = self._python_apply_general(f)
690 except Exception:
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
706 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707 self.axis)
708
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
189 group_axes = _get_axes(group)
--> 190 res = f(group)
191 if not _is_indexed_like(res, group_axes):
<ipython-input-29-e5d0ce53cd1c> in <lambda>(x)
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in fillna(self, method, limit)
759 """
--> 760 return self._upsample(method, limit=limit)
761
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1072 result = obj.reindex(res_index, method=method,
-> 1073 limit=limit, fill_value=fill_value)
1074
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
198
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3810
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3740 frame = frame._reindex_index(index, method, copy, level,
-> 3741 fill_value, limit, tolerance)
3742
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3748 level=level, limit=limit,
-> 3749 tolerance=tolerance)
3750 return self._reindex_with_indexers({0: [new_index, indexer]},
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3137 if method is not None or limit is not None:
-> 3138 raise ValueError("cannot reindex a non-unique index "
3139 "with a method or limit")
ValueError: cannot reindex a non-unique index with a method or limit
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-29-e5d0ce53cd1c> in <module>()
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
699
700 with _group_selection_context(self):
--> 701 return self._python_apply_general(f)
702
703 return result
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
705 def _python_apply_general(self, f):
706 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707 self.axis)
708
709 return self._wrap_applied_output(
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
188 # group might be modified
189 group_axes = _get_axes(group)
--> 190 res = f(group)
191 if not _is_indexed_like(res, group_axes):
192 mutated = True
<ipython-input-29-e5d0ce53cd1c> in <lambda>(x)
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in fillna(self, method, limit)
758 2018-01-01 02:00:00 6.0 5
759 """
--> 760 return self._upsample(method, limit=limit)
761
762 @Appender(_shared_docs['interpolate'] % _shared_docs_kwargs)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1071 else:
1072 result = obj.reindex(res_index, method=method,
-> 1073 limit=limit, fill_value=fill_value)
1074
1075 result = self._apply_loffset(result)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
195 @wraps(func)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
198
199 if not PY2:
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3807 kwargs.pop('axis', None)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3810
3811 @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4354 # perform the reindex on the axes
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
4358 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3739 if index is not None:
3740 frame = frame._reindex_index(index, method, copy, level,
-> 3741 fill_value, limit, tolerance)
3742
3743 return frame
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3747 new_index, indexer = self.index.reindex(new_index, method=method,
3748 level=level, limit=limit,
-> 3749 tolerance=tolerance)
3750 return self._reindex_with_indexers({0: [new_index, indexer]},
3751 copy=copy, fill_value=fill_value,
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3136 else:
3137 if method is not None or limit is not None:
-> 3138 raise ValueError("cannot reindex a non-unique index "
3139 "with a method or limit")
3140 indexer, missing = self.get_indexer_non_unique(target)
ValueError: cannot reindex a non-unique index with a method or limit
解决方案
我能够关注 Pandas:将日期范围解压缩到各个日期,并且我没有在您的数据集上收到任何错误。在答案中查看下面的代码
import pandas as pd
df = pd.DataFrame([['2019-01-15','2019-01-31','A',121.0],
['2019-02-01','2020-03-10','A',136.0],
['2006-10-02','2020-03-10','B',136.0],
['2003-07-31','2020-03-10','B',321.0],
['2010-11-03','2020-03-10','C',322.0],
['2013-02-01','2017-02-07','D',375.0],
['2017-02-08','2019-01-14','D',375.0],
['2019-01-15','2019-04-29','D',375.0],
['2019-04-30','2020-03-10','D',375.0]],
columns=['StartDate','EndDate','Company','Location'])
df['StartDate'] = pd.to_datetime(df['StartDate'])
df['EndDate'] = pd.to_datetime(df['EndDate'])
df.set_index('Company', inplace=True)
df['row'] = range(len(df))
print(df)
starts = df[['StartDate', 'Location', 'row']].rename(columns={'StartDate': 'Date'})
ends = df[['EndDate', 'Location', 'row']].rename(columns={'EndDate':'Date'})
df_decomp = pd.concat([starts, ends])
df_decomp = df_decomp.set_index('row', append=True)
df_decomp.sort_index()
print(df_decomp)
df_decomp = df_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('Date').resample('D').fillna(method='pad'))
df_decomp = df_decomp.reset_index(level=1, drop=True)
print(df_decomp.loc['D'])
推荐阅读
- swift - 如何断言结构符合协议?
- python - 如何使用 BrowserStack Appium 和 Python 创建移动自动化应用程序测试?
- python-3.x - 获取在消息上添加表情符号的用户列表
- scheduling - 活动运行的 iCalendar 重复规则:每两周,周一和周三,活动一天内的每 30 分钟?
- php - PHP - 从连接数组中删除键
- reactjs - 以用户身份从前端下载图像(我有链接)?
- go - 当 NextDecoder 依赖于上一层而不是当前层时,如何正确编写自定义层?
- javascript - 新的 div CSS 不采用“top”和“left”类型
- kubernetes - kustomize configmap 命令:它有什么作用?
- codenameone - 如何分析应用商店崩溃报告