python - Scipy - 如何使用 Python Scipy Curve Fit 拟合这个 beta 分布
问题描述
我curve_fit
对scipy
. 我有很多看起来像y
和不像的发行版y
。大多数看起来像的发行y
版都是 beta 发行版。我的方法是,如果我可以在所有具有不同分布的唯一 ID 上拟合 beta 函数,我可以从 beta 函数中找到系数,然后查看大小接近的系数,然后我可以有效地过滤掉所有分布那个样子y
。
y
看起来像这样(下面的示例代码中的数据相同):
但是,我在开始时遇到了一些麻烦。
y = array([[ 0.50423378, 0.50423378, 0.50423378, 0.50254455, 0.50423378, 0.50254455, 0.50423378, 0.50507627, 0.50507627, 0.50423378,0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378, 0.50423378, 0.50423378, 0.50423378, 0.50254455, 0.50254455, 0.50254455, 0.50423378, 0.50423378, 0.50507627, 0.50507627,0.50507627, 0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378, 0.50507627, 0.50507627, 0.50423378, 0.50507627, 0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378,0.50423378, 0.50423378, 0.50254455, 0.50254455, 0.5, 0.50254455, 0.50254455, 0.50254455, 0.50423378, 0.50423378,0.50423378, 0.50423378, 0.50423378, 0.50254455, 0.50423378, 0.50254455, 0.50254455, 0.50423378, 0.50423378, 0.50254455,0.5 , 0.5 , 0.50254455, 0.50254455, 0.5 ,0.49658699, 0.49228746, 0.49228746, 0.48707792, 0.48092881,0.48707792, 0.48092881, 0.48092881, 0.48092881, 0.48092881,0.48092881, 0.48092881, 0.47380354, 0.47380354, 0.48092881,0.48707792, 0.48707792, 0.48092881, 0.48092881, 0.48092881,0.48092881, 0.48092881, 0.48092881, 0.47380354, 0.48092881,0.48092881, 0.48092881, 0.48707792, 0.48707792, 0.48707792,0.49228746, 0.49228746, 0.49228746, 0.49228746, 0.48707792,0.48707792, 0.48707792, 0.49228746, 0.48707792, 0.48707792,0.48707792, 0.48707792, 0.48707792, 0.49228746, 0.49228746,0.48707792, 0.48707792, 0.49228746, 0.49658699, 0.49658699,0.49658699, 0.49228746, 0.49228746, 0.49658699, 0.49228746,0.49658699, 0.5 , 0.50254455, 0.50423378, 0.50423378,0.50254455, 0.50423378, 0.50423378, 0.50254455, 0.5 ,0.5 , 0.5 , 0.5 , 0.5 , 0.50254455,0.50254455, 0.5 , 0.50254455, 0.5 , 0.5 ,0.5 , 0.5 , 0.5 , 0.5 , 0.49658699,0.49228746, 0.48707792, 0.48707792, 0.48707792, 0.49228746,0.49228746, 0.48707792, 0.48707792, 0.49228746, 0.48707792,0.48707792, 0.48707792, 0.48092881, 0.48092881, 0.48707792,0.48707792, 0.48092881, 0.47380354, 0.48092881, 0.48092881,0.48707792, 0.49228746, 0.48707792, 0.49228746, 0.48707792,0.48092881, 0.47380354, 0.46565731, 0.46565731, 0.46565731,0.45643546, 0.45643546, 0.45643546, 0.45643546, 0.45643546,0.45643546, 0.45643546, 0.46565731, 0.45643546, 0.45643546,0.45643546, 0.44607129, 0.45643546, 0.45643546, 0.45643546,0.44607129, 0.44607129, 0.43448304, 0.43448304, 0.43448304,0.44607129, 0.45643546, 0.45643546, 0.45643546, 0.46565731,0.47380354, 0.48092881, 0.48092881, 29.38186886, 29.38186886,29.38186886, 29.37898909, 29.45299206, 29.52449116, 29.74083063,29.73771398, 29.73771398, 29.74083063, 29.74083063, 29.74083063,29.74083063, 29.73771398, 29.74083063, 29.73771398, 29.73771398,29.73771398, 29.73771398, 29.74083063, 29.74083063, 29.74083063,30.12527698, 30.48367189, 30.8169243 , 30.8169243 , 30.8169243 ,30.8169243 , 30.82153203, 30.8169243 , 30.81230208, 30.81230208,30.80766536, 30.81230208, 30.81230208, 30.80766536, 30.80301414,30.80301414, 30.80301414, 30.80301414, 30.80301414, 30.80766536,30.81230208, 30.81230208, 30.81230208, 30.81230208, 30.8169243 ,30.82153203, 30.82612528, 10.51949923, 10.51949923, 10.51436497,10.51436497, 10.22456193, 9.91464422, 9.36922158, 9.37416663,9.36922158, 9.36922158, 9.36922158, 9.37416663, 9.37906375,9.383913 , 9.383913 , 9.38871446, 9.383913 , 9.37906375,9.37416663, 9.36922158, 9.36422851, 9.35918734, 7.72711675,5.53121937, 0.5 , 0.50254455, 0.50254455, 0.50254455,0.50254455, 0.50254455, 0.5 , 0.5 , 0.49658699,0.5 , 0.5 , 0.5 , 0.49658699, 0.49658699,0.5 , 0.50254455, 0.50423378, 0.50423378, 0.50423378,0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378,0.50423378, 0.50423378, 0.50254455, 0.50254455, 0.5 ,0.5 , 0.5 , 0.49658699, 0.5 , 0.49658699,0.49658699, 0.49658699, 0.49658699, 0.49658699, 0.49658699,0.49658699, 0.49658699, 0.49228746, 0.48707792, 0.48707792,0.48092881, 0.47380354, 0.47380354, 0.46565731, 0.46565731,0.47380354, 0.46565731, 0.47380354, 0.47380354, 0.47380354, 0.47380354, 0.48092881]])
使用 scipy 中的这个示例,我如何获取x
数组并将其插入以获取我的系数,然后curve_fit
在我的分布上绘制?
import numpy as np
from scipy.optimize import curve_fit
from scipy.special import gamma as gamma
def betafunc(x,a,b,cst):
return cst*gamma(a+b) * (x**(a-1)) * ((1-x)**(b-1)) / ( gamma(a)*gamma(b) )
x = np.array( [0.1, 0.3, 0.5, 0.7, 0.9, 1.1])
y = np.array( [0.45112234, 0.56934313, 0.3996803 , 0.28982859, 0.19682153, 0.] )
popt2,pcov2 = curve_fit(betafunc,x[:-1],y[:-1],p0=(0.5,1.5,0.5))
print(popt2)
print(pcov2)
解决方案
对于您问题的第一部分: 如果您有一组观察结果,您可以使用 numpy.histogram 来获取直方图。要获得每个垃圾箱的中心,请按照下面的代码进行。您可以将这些值用于拟合过程。根据您提供的数据,谁不能适合 betafunc,因为它根本不适合。
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
from scipy.special import gamma as gamma
def betafunc(x,a,b,cst):
return cst*gamma(a+b) * (x**(a-1)) * ((1-x)**(b-1)) / ( gamma(a)*gamma(b) )
y_data=np.array([[ 0.50423378, 0.50423378, 0.50423378, 0.50254455, 0.50423378, 0.50254455, 0.50423378, 0.50507627, 0.50507627, 0.50423378,0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378, 0.50423378, 0.50423378, 0.50423378, 0.50254455, 0.50254455, 0.50254455, 0.50423378, 0.50423378, 0.50507627, 0.50507627,0.50507627, 0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378, 0.50507627, 0.50507627, 0.50423378, 0.50507627, 0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378,0.50423378, 0.50423378, 0.50254455, 0.50254455, 0.5, 0.50254455, 0.50254455, 0.50254455, 0.50423378, 0.50423378,0.50423378, 0.50423378, 0.50423378, 0.50254455, 0.50423378, 0.50254455, 0.50254455, 0.50423378, 0.50423378, 0.50254455,0.5 , 0.5 , 0.50254455, 0.50254455, 0.5 ,0.49658699, 0.49228746, 0.49228746, 0.48707792, 0.48092881,0.48707792, 0.48092881, 0.48092881, 0.48092881, 0.48092881,0.48092881, 0.48092881, 0.47380354, 0.47380354, 0.48092881,0.48707792, 0.48707792, 0.48092881, 0.48092881, 0.48092881,0.48092881, 0.48092881, 0.48092881, 0.47380354, 0.48092881,0.48092881, 0.48092881, 0.48707792, 0.48707792, 0.48707792,0.49228746, 0.49228746, 0.49228746, 0.49228746, 0.48707792,0.48707792, 0.48707792, 0.49228746, 0.48707792, 0.48707792,0.48707792, 0.48707792, 0.48707792, 0.49228746, 0.49228746,0.48707792, 0.48707792, 0.49228746, 0.49658699, 0.49658699,0.49658699, 0.49228746, 0.49228746, 0.49658699, 0.49228746,0.49658699, 0.5 , 0.50254455, 0.50423378, 0.50423378,0.50254455, 0.50423378, 0.50423378, 0.50254455, 0.5 ,0.5 , 0.5 , 0.5 , 0.5 , 0.50254455,0.50254455, 0.5 , 0.50254455, 0.5 , 0.5 ,0.5 , 0.5 , 0.5 , 0.5 , 0.49658699,0.49228746, 0.48707792, 0.48707792, 0.48707792, 0.49228746,0.49228746, 0.48707792, 0.48707792, 0.49228746, 0.48707792,0.48707792, 0.48707792, 0.48092881, 0.48092881, 0.48707792,0.48707792, 0.48092881, 0.47380354, 0.48092881, 0.48092881,0.48707792, 0.49228746, 0.48707792, 0.49228746, 0.48707792,0.48092881, 0.47380354, 0.46565731, 0.46565731, 0.46565731,0.45643546, 0.45643546, 0.45643546, 0.45643546, 0.45643546,0.45643546, 0.45643546, 0.46565731, 0.45643546, 0.45643546,0.45643546, 0.44607129, 0.45643546, 0.45643546, 0.45643546,0.44607129, 0.44607129, 0.43448304, 0.43448304, 0.43448304,0.44607129, 0.45643546, 0.45643546, 0.45643546, 0.46565731,0.47380354, 0.48092881, 0.48092881, 29.38186886, 29.38186886,29.38186886, 29.37898909, 29.45299206, 29.52449116, 29.74083063,29.73771398, 29.73771398, 29.74083063, 29.74083063, 29.74083063,29.74083063, 29.73771398, 29.74083063, 29.73771398, 29.73771398,29.73771398, 29.73771398, 29.74083063, 29.74083063, 29.74083063,30.12527698, 30.48367189, 30.8169243 , 30.8169243 , 30.8169243 ,30.8169243 , 30.82153203, 30.8169243 , 30.81230208, 30.81230208,30.80766536, 30.81230208, 30.81230208, 30.80766536, 30.80301414,30.80301414, 30.80301414, 30.80301414, 30.80301414, 30.80766536,30.81230208, 30.81230208, 30.81230208, 30.81230208, 30.8169243 ,30.82153203, 30.82612528, 10.51949923, 10.51949923, 10.51436497,10.51436497, 10.22456193, 9.91464422, 9.36922158, 9.37416663,9.36922158, 9.36922158, 9.36922158, 9.37416663, 9.37906375,9.383913 , 9.383913 , 9.38871446, 9.383913 , 9.37906375,9.37416663, 9.36922158, 9.36422851, 9.35918734, 7.72711675,5.53121937, 0.5 , 0.50254455, 0.50254455, 0.50254455,0.50254455, 0.50254455, 0.5 , 0.5 , 0.49658699,0.5 , 0.5 , 0.5 , 0.49658699, 0.49658699,0.5 , 0.50254455, 0.50423378, 0.50423378, 0.50423378,0.50507627, 0.50507627, 0.50423378, 0.50423378, 0.50423378,0.50423378, 0.50423378, 0.50254455, 0.50254455, 0.5 ,0.5 , 0.5 , 0.49658699, 0.5 , 0.49658699,0.49658699, 0.49658699, 0.49658699, 0.49658699, 0.49658699,0.49658699, 0.49658699, 0.49228746, 0.48707792, 0.48707792,0.48092881, 0.47380354, 0.47380354, 0.46565731, 0.46565731,0.47380354, 0.46565731, 0.47380354, 0.47380354, 0.47380354, 0.47380354, 0.48092881]])
hist=np.histogram(y_data[0],bins=20)
x=(hist[1][1:]+hist[1][:-1])/2
y=hist[0]
print(x,y)
plt.step(x,y,label='Manual calculation of the center of the bins')
plt.hist(y_data[0],bins=20,histtype='bar',label='Automatic plot with plt.hist')
plt.legend()
plt.show()
popt2,pcov2 = curve_fit(betafunc,x[:-1],y[:-1],p0=(0.5,1.5,0.5))
对于您问题的第二部分: 要使用最佳拟合参数绘制函数,您只需添加我在最后添加的最后四行代码。
import numpy as np
from scipy.optimize import curve_fit
from scipy.special import gamma as gamma
def betafunc(x,a,b,cst):
return cst*gamma(a+b) * (x**(a-1)) * ((1-x)**(b-1)) / ( gamma(a)*gamma(b) )
x = np.array( [0.1, 0.3, 0.5, 0.7, 0.9, 1.1])
y = np.array( [0.45112234, 0.56934313, 0.3996803 , 0.28982859, 0.19682153, 0.] )
popt2,pcov2 = curve_fit(betafunc,x[:-1],y[:-1],p0=(0.5,1.5,0.5))
print(popt2)
print(pcov2)
from matplotlib import pyplot as plt
plt.plot(x,betafunc(x,*popt2))
plt.plot(x,y)
plt.show()
推荐阅读
- c++ - 您可以在不使用任何包含的情况下在 C++ 中输出数据吗?
- terraform - google_sql_database_instance 使用私有 IP 创建多个实例时等待创建实例时出错
- json - 奇怪的 KeyError (Python)
- css - 媒体使用开发工具查询移动/桌面的不同可视化
- javascript - Django - 我的静态文件放错地方了吗?
- c++ - 在 Win32 C++ 桌面应用程序中读取进程的“stdout”输出
- pine-script - 如何在松木上制作更薄的新线
- python - 使用日志分析进行数据分类
- php - 使用php邮件功能发送电子邮件
- windows - LINK : 构建 pointnet++ 模块时出现致命错误 LNK1181