首页 > 解决方案 > 在gekko非线性回归中处理多维数组的正确方法是什么?

问题描述

尝试使用 python 的 gekko 库进行非线性回归。

样本取自这里 http://apmonitor.com/wiki/index.php/Main/GekkoPythonOptimization

就我而言,我需要多维回归。所以我试着做一些修改。这是结果。

import pandas
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt

# # measurements
xm = np.array([[80435, 33576, 3930495], [63320, 21365, 2515052],
[131294, 46680, 10339497], [64470, 29271, 3272846],
[23966, 7973, 3450144], [19863, 11429, 3427307],
[32139, 13114, 2462822], [78976, 26973, 5619715],
[32857, 10455, 3192817], [29400, 12808, 3665615],
[4667, 2876, 2556650], [21477, 10349, 6005812],
[9168, 4617, 2878631], [385112, 127609, 4063576],
[55522, 29954, 3632023], [155, 197, 507],
[160, 106, 336], [25, 23, 669], [86, 96, 751], [199, 235, 515],
[60, 83, 511], [8, 25, 187], [32, 59, 679], [11, 22, 365],
[322, 244, 2001], [172, 229, 1110], [41, 48, 447], [109, 144, 2386],
[23, 27, 319], [105, 204, 672], [77, 77, 2]])

ym = np.array([90,85,91,90,90,82,81,85,83,83,72,78,
74,92,90,28,26,13,12,22,25,5,10,15,50,54,4,28,10,7,6])


# GEKKO model
m = GEKKO()

# parameters
x = m.Param(value=xm, name='X')
y = m.CV(value=ym)
y.FSTATUS = 1

a1 = m.FV()
a1.STATUS=1

a2 = m.FV()
a2.STATUS=1

a3 = m.FV()
a3.STATUS=1

# regression equation
for i in range(len(x)):
    m.Equation(
        y[i] == np.log10(x[i][0]) * a1 +
                np.log10(x[i][1]) * a2 +
                np.log10(x[i][2]) * a3)

# regression mode
m.options.IMODE = 2

# optimize
m.solve(disp=False, GUI=False)

# print parameters
print('Optimized, a = ', str(a1), str(a2), str(a3))

plt.plot(y.value, ym, 'bo')
# plt.plot(xm, y.value, 'r-')
plt.show()

结果我得到错误

File "/usr/local/lib/python3.6/dist-packages/gekko/gekko.py", line 1830, in solve
self._write_csv()
File "/usr/local/lib/python3.6/dist-packages/gekko/gk_write_files.py", line

184, in _write_csv raise Exception('Data arrays must have the same length, and match time discretization in dynamic questions') Exception: Data arrays must have the same length, and match time discretization in dynamic questions

标签: pythonmultidimensional-arraynon-linear-regressiongekko

解决方案


以下是修改的摘要:

  • 使用m.log10代替np.log10
  • 定义x为一个数组并将每一列(例如xm[:,0]x[0].value分别加载到其中。
  • 只定义一次方程,而不是为每个数据行多次。IMODE=2 以这种方式对于大型数据集是有效的,因为方程只定义了一个,并且所有数据点都使用相同的表达式进行评估。
  • 在绘图中添加了红线
  • 打印a[i].value[0]以显示数值解

回归结果

    import pandas
    from gekko import GEKKO
    import numpy as np
    import matplotlib.pyplot as plt

    # # measurements
    xm = np.array([[80435, 33576, 3930495], [63320, 21365, 2515052],
    [131294, 46680, 10339497], [64470, 29271, 3272846],
    [23966, 7973, 3450144], [19863, 11429, 3427307],
    [32139, 13114, 2462822], [78976, 26973, 5619715],
    [32857, 10455, 3192817], [29400, 12808, 3665615],
    [4667, 2876, 2556650], [21477, 10349, 6005812],
    [9168, 4617, 2878631], [385112, 127609, 4063576],
    [55522, 29954, 3632023], [155, 197, 507],
    [160, 106, 336], [25, 23, 669], [86, 96, 751], [199, 235, 515],
    [60, 83, 511], [8, 25, 187], [32, 59, 679], [11, 22, 365],
    [322, 244, 2001], [172, 229, 1110], [41, 48, 447], [109, 144, 2386],
    [23, 27, 319], [105, 204, 672], [77, 77, 2]])

    ym = np.array([90,85,91,90,90,82,81,85,83,83,72,78,
    74,92,90,28,26,13,12,22,25,5,10,15,50,54,4,28,10,7,6])

    # GEKKO model
    m = GEKKO(remote=False)

    # parameters
    n = np.size(xm,1)
    x = m.Array(m.Param,n)
    for i in range(n):
        x[i].value = xm[:,i]
    y = m.CV(value=ym)
    y.FSTATUS = 1

    a1 = m.FV()
    a1.STATUS=1

    a2 = m.FV()
    a2.STATUS=1

    a3 = m.FV()
    a3.STATUS=1

    # regression equation
    m.Equation(y == m.log10(x[0]) * a1 + \
                    m.log10(x[1]) * a2 + \
                    m.log10(x[2]) * a3)

    # regression mode
    m.options.IMODE = 2

    # optimize
    m.solve(disp=True, GUI=False)

    # print parameters
    print('Optimized, a = ', str(a1.value.value[0]), str(a2.value[0]), str(a3.value[0]))

    plt.plot(y.value, ym, 'bo')
    plt.plot([0,max(ym)],[0,max(ym)],'r-')
    plt.show()

推荐阅读