首页 > 解决方案 > 在 pandas 中使用带有 `apply()` 的 `scipy.stats` 函数时出错

问题描述

我正在尝试计算与数据框中的各个条目相关联的百分位数(使用列中值的分布)。我确定我遗漏了一些非常基本的东西,但无法弄清楚为什么在运行以下代码时出现错误,

from scipy.stats import percentileofscore as pctl
import pandas as pd
import numpy as np

data = np.arange(100).reshape(20,5)
df = pd.DataFrame(data)

def f(series):
    r= series.index
    return pctl(series.values, series.iloc[r])

df.apply(f)

这是我得到的错误,

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-4d3ad4c6f441> in <module>
----> 1 df.apply(f)

C:\Python\Miniconda\envs\leiap\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6012                          args=args,
   6013                          kwds=kwds)
-> 6014         return op.get_result()
   6015 
   6016     def applymap(self, func):

C:\Python\Miniconda\envs\leiap\lib\site-packages\pandas\core\apply.py in get_result(self)
    316                                       *self.args, **self.kwds)
    317 
--> 318         return super(FrameRowApply, self).get_result()
    319 
    320     def apply_broadcast(self):

C:\Python\Miniconda\envs\leiap\lib\site-packages\pandas\core\apply.py in get_result(self)
    140             return self.apply_raw()
    141 
--> 142         return self.apply_standard()
    143 
    144     def apply_empty_result(self):

C:\Python\Miniconda\envs\leiap\lib\site-packages\pandas\core\apply.py in apply_standard(self)
    246 
    247         # compute the result using the series generator
--> 248         self.apply_series_generator()
    249 
    250         # wrap results

C:\Python\Miniconda\envs\leiap\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
    275             try:
    276                 for i, v in enumerate(series_gen):
--> 277                     results[i] = self.f(v)
    278                     keys.append(v.name)
    279             except Exception as e:

<ipython-input-6-347aa35ccd44> in f(series)
      1 def f(series):
      2     r= series.index
----> 3     return pctl(series.values, series.iloc[r])

C:\Python\Miniconda\envs\leiap\lib\site-packages\scipy\stats\stats.py in percentileofscore(a, score, kind)
   1785 
   1786     """
-> 1787     if np.isnan(score):
   1788         return np.nan
   1789     a = np.asarray(a)

C:\Python\Miniconda\envs\leiap\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
   1574         raise ValueError("The truth value of a {0} is ambiguous. "
   1575                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
-> 1576                          .format(self.__class__.__name__))
   1577 
   1578     __bool__ = __nonzero__

ValueError: ('The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().', 'occurred at index 0')

标签: pythonpandasdataframescipypandas-apply

解决方案


问题是参数scipy.stats.percentileofscore(a, score, kind='rank')的值不是数组score

(正在发布 NumPy 解决方案,但它不正确)这个怎么样:

def f(series):

    return series.apply(lambda x:pctl(series,x) )

df.apply(f)

推荐阅读