python - 如何获得回归以优化系数和数据数组?Python
问题描述
编辑:31/7/19
我有一个数据集,其中包含 4 个站点的 PGR(牧场增长率)和 Foo(牧场数量)读数,一年中大约 6 周。PGR 和 Foo 之间的关系是反指数的。
我想做的是将几周分成3个批次。PGR 和 Foo 之间具有相似关系的几周将在一起。
组大小不必相同。但是周必须是连续的,即
第一组 - 第 1 周,第 2 周,第 3 周。
第 2 组 - 第 4 周。第 3 组
- 第 5 周,第 6 周。
我想做的是创建 3 个回归来优化以减少平方和,同时优化周选择。
上面的例子表明第 1 - 3 周相似,第 4 周与第 3 周不同,第 5 周和第 6 周彼此相似但与第 4 周不同。(我希望这个分组根据回归自动发生)
下面的代码是我的尝试,但它不起作用(我将其包含在内以帮助更好地解释我正在尝试做的事情)。
data = {'Week':[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6],
'PGR':[10,29,34.93,32,10,29,34.93,35,31,36,34.93,37,40,46,50,52,40,60,65,68,42,62,65,68],
'Foo': [20,45,102.28,66.79,25,50,90,75,50,75,90,130,50,75,90,130,30,60,105,150,35,60,110,140]}
df = pd.DataFrame(data)
def group(x):
a, b, c, e, f, g, h, i, j, w, z, y = x
#below defines the groups, I want z, y & w to be optimised when this function is solverd
#This determines which weeks are in which groups
group1 = df.loc[(df['Week'] == range(1,z))]
group2 = df.loc[(df['Week'] == range(z,y))]
group3 = df.loc[(df['Week'] == range(y,w))]
#Once the groups are defined this will extract Foo and PGR values for regressions
xm1 = group1['Foo'].to_numpy()
ym1 = group1['PGR'].to_numpy()
xm2 = group2['Foo'].to_numpy()
ym2 = group2['PGR'].to_numpy()
xm3 = group3['Foo'].to_numpy()
ym3 = group3['PGR'].to_numpy()
#These are the 3 regressions
y1 = a + b / xm1 + c * np.log(xm1)
SSE1 = (y1 - ym1)**2
y2 = e + f / xm2 + g * np.log(xm2)
SSE2 = (y2 - ym2) ** 2
y3 = h + i / xm3 + j * np.log(xm3)
SSE3 = (y3 - ym3) ** 2
return SSE1, SSE2, SSE3
#I now have the sum of squares for all the regressions, which I want to minimise
#Minimising can happen by selecting groups that are more similar or by changing the regression coefficients
def objective(x):
return np.sum(group(x))
x0 = np.zeros(12)
# bounds for a, b, c, e, f, g, h, i, j, w, z, y
bndspositive = (0,52)
bnds100 = (-100.0, 100.0)
no_bnds = (-1.0e10, 1.0e10)
bnds = (no_bnds, no_bnds, bnds100, no_bnds, no_bnds, bnds100, no_bnds, no_bnds, bnds100, bndspositive, bndspositive, bndspositive)
# optimise groups and regressions for best fit
solution = minimize(objective, x0, method=None, bounds=bnds)
# solution
x = solution.x
希望这是有道理的,谢谢
解决方案
另一种方法是将数据拟合为 3D 表面。我的方程搜索出现“Foo = a * PGR + b * week^2 + Offset”作为可能的候选方程,参数 a = 2.90940013、b = -2.33138779 和 Offset = -10.04234205 产生 RMSE = 22.02 和 R 平方= 0.6338。这是一个带有您的数据和这个等式的图形 Python 拟合器。
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('Week') # X axis data label
axes.set_ylabel('PGR') # Y axis data label
axes.set_zlabel('Foo') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('Week') # X axis data label
axes.set_ylabel('PGR') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('Week')
axes.set_ylabel('PGR')
axes.set_zlabel('Z Foo')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, Offset):
x = data[0]
y = data[1]
return a*y + b*numpy.square(x) + Offset
if __name__ == "__main__":
week = [1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6]
PGR = [10,29,34.93,32,10,29,34.93,35,31,36,34.93,37,40,46,50,52,40,60,65,68,42,62,65,68]
Foo = [20,45,102.28,66.79,25,50,90,75,50,75,90,130,50,75,90,130,30,60,105,150,35,60,110,140]
xData = numpy.array(week, dtype=numpy.float32)
yData = numpy.array(PGR, dtype=numpy.float32)
zData = numpy.array(Foo, dtype=numpy.float32)
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
推荐阅读
- windows - 如何从 Powershell 最大化 Skype
- python - 减少方向码
- c - Fisher 数的结果检查不正确
- fonts - 字体在页面的某些部分正确显示,但在其他部分不正确
- git - 带有本地更改的 Git-svn rebase
- javascript - 反应表集合上的日期自定义过滤器
- java - 发布 Power Manager onPause 的原因是什么?
- verilog - Verilog - 故障定时信号到模块
- javascript - 如何获取div的innerHTML,其中包含一个div?
- javascript - Puppeteer console.log - 如何查看 JSHandle@object 内部?