python - pandas 中的错误:“缓冲区的维数错误(预期为 1,得到 2)”
问题描述
我看过几个帖子都有同样的错误,但很抱歉,我仍然无法正确解决我的问题。
这是我的 .py 文件中的代码:
def get_sum(self, data, list_of_items):
# I coded this return line, which worked according to one of the cells of the .ipynb file
# Returns an integer summing up all of the 1s in a given column (or set of columns)
return data.iloc[:, list_of_items].all(axis = 'columns').sum()
def get_list(self, data):
basket = [[i] for i in data.columns]
product_list = []
for item in basket:
# I coded these two lines, which I am unable to test due to the error
if self.get_sum(data, [item]) >= 5:
product_list.append(item)
return product_list
此单元格将在 .ipynb 中访问它:
basket_list = groceries.get_list(df)
print(basket_list)
# expected to be something like [0, 2]. In the case of the given sample, [1]
错误出现在 .ipynb 单元格的第一行,但这不可能,因为我只更改了 .py 文件(提供了两个文件)。话虽这么说,我不知道错误到底在哪里,因为它没有指向我写的任何东西。如果有人至少可以提示我做错了什么,那将不胜感激。
编辑:这是数据示例:
0 1 2
0 0 0 1
1 1 1 1
2 1 1 0
3 0 1 0
4 1 1 1
5 1 1 0
这是完整的错误回溯,
ValueError Traceback (most recent call last)
<ipython-input-142-6c2c13d12cb0> in <module>()
----> 1 frequent_itemsets = rule_miner.get_frequent_itemsets(syn_df)
2 print(frequent_itemsets)
/content/rule_miner.py in get_frequent_itemsets(self, data)
121 # this class.
122
--> 123 if self.get_support(data, [itemset]) >= self.support_t:
124 new_itemsets.append(itemset)
125
/content/rule_miner.py in get_support(self, data, itemset)
28 # function.
29
---> 30 return data.iloc[:, itemset].all(axis = 'columns').sum()
31
32 def merge_itemsets(self, itemsets):
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
871 # AttributeError for IntervalTree get_value
872 pass
--> 873 return self._getitem_tuple(key)
874 else:
875 # we by definition only have the 0th axis
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
1447 pass
1448
-> 1449 return self._getitem_tuple_same_dim(tup)
1450
1451 def _get_list_axis(self, key, axis: int):
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_tuple_same_dim(self, tup)
748 continue
749
--> 750 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
751 # We should never have retval.ndim < self.ndim, as that should
752 # be handled by the _getitem_lowerdim call above.
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1485 # a list of integers
1486 elif is_list_like_indexer(key):
-> 1487 return self._get_list_axis(key, axis=axis)
1488
1489 # a single integer
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _get_list_axis(self, key, axis)
1467 """
1468 try:
-> 1469 return self.obj._take_with_is_copy(key, axis=axis)
1470 except IndexError as err:
1471 # re-raise with different error message
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in _take_with_is_copy(self, indices, axis)
3361 See the docstring of `take` for full explanation of the parameters.
3362 """
-> 3363 result = self.take(indices=indices, axis=axis)
3364 # Maybe set copy if we didn't actually change the index.
3365 if not result._get_axis(axis).equals(self._get_axis(axis)):
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py in take(self, indices, axis, is_copy, **kwargs)
3349
3350 new_data = self._mgr.take(
-> 3351 indices, axis=self._get_block_manager_axis(axis), verify=True
3352 )
3353 return self._constructor(new_data).__finalize__(self, method="take")
/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py in take(self, indexer, axis, verify, convert)
1455 new_labels = self.axes[axis].take(indexer)
1456 return self.reindex_indexer(
-> 1457 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
1458 )
1459
/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, consolidate)
1289
1290 if axis == 0:
-> 1291 new_blocks = self._slice_take_blocks_ax0(indexer, fill_value=fill_value)
1292 else:
1293 new_blocks = [
/usr/local/lib/python3.7/dist-packages/pandas/core/internals/managers.py in _slice_take_blocks_ax0(self, slice_or_indexer, fill_value, only_slice)
1369 else:
1370 blknos = algos.take_1d(
-> 1371 self.blknos, slobj, fill_value=-1, allow_fill=allow_fill
1372 )
1373 blklocs = algos.take_1d(
/usr/local/lib/python3.7/dist-packages/pandas/core/algorithms.py in take_nd(arr, indexer, axis, out, fill_value, allow_fill)
1735 arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
1736 )
-> 1737 func(arr, indexer, out, fill_value)
1738
1739 if flip_order:
pandas/_libs/algos_take_helper.pxi in pandas._libs.algos.take_1d_int64_int64()
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
解决方案
不需要复杂的自定义函数,您可以使用 pandas 向量函数:
获取至少有五个 1 的列的名称:
>>> df.columns[df.eq(1).sum().ge(5)]
Index(['1'], dtype='object')
过滤这些列:
df.loc[:, df.eq(1).sum().ge(5)]
输出:
1
0 0
1 1
2 1
3 1
4 1
5 1
这个怎么运作:
(df.eq(1) # values equal to 1 -> True
.sum() # count number of True
.ge(5) # True if sum ≥ 5
)
OP代码修复:
loc
用于代替iloc
传递给的错误列表get_sum
def get_sum(data, list_of_items):
# I coded this return line, which worked according to one of the cells of the .ipynb file
return data.loc[:, list_of_items].all(axis = 'columns').sum()
def get_list(data):
product_list = []
for item in df.columns:
# I coded these two lines, which I am unable to test due to the error
if get_sum(data, [item]) >= 5:
product_list.append(item)
return product_list
推荐阅读
- reactjs - 使用 formik 的自动完成材料 UI 组件正在加载错误和警告
- json - Last.fm api json 没有被快速解码器读取
- javascript - csvtojson如何添加行号
- pdf-generation - 生成 pdf 时不应用 toc 标题的 CSS 宽度
- reactjs - React Hooks:如何根据另一个状态变量更新来更新状态变量
- c++ - 构建 k-mers 集合的重叠图
- ios - 当我们在项目中添加多个storyboard时,什么时候需要指定和声明storyboard变量
- deep-learning - TypeError:预期大小为浮点数,得到
- java - 如何修复Android编程中的内容不允许主目录空
- python - 如何模拟 HTML <-> FLASK 与 cURL 的交互 - POST 请求