python - 多 ODE 函数中的曲线拟合参数
问题描述
我不想使用 Python curve_fit和odeint实现诊断延迟对 COVID-19 传播的影响的SEIR 模型(几乎没有修改)。没有,我的代码是这样的:curve_fit
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
def func_ode(y,t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d):
S_q,S,E1,E2,H,R,D,V=y
dSq=qS*S-qSq*S_q
dS=qSq*S_q-(betaV1*V+betaV2*V+beta1*E1+beta2*E2)*S
dE1=(betaV1*V+beta1*E1)*S-(e1+eta1)*E1
dE2=(betaV2*V+beta2*E2)*S+e1*E1-(eta2+delta2+theta2)*E2
dH=theta2*E2-(etaH+deltaH)*H # theta is for recovered
dR=eta1*E1+eta2*E2+etaH*H # eta is for recovered
dD=delta2*E2+deltaH*H # delta is for death
dV=f1*E1+f2*E2+fH*H-d*V
dy=[dSq, dS, dE1, dE2, dH, dR, dD, dV]
return dy
if __name__ == "__main__":
## Parameters (dummy)
qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d = \
0, 1e-4, 4e-9, 1e-9, 4e-9, 1e-9, 1/100, 1/21, 1/104, 1/10, 1/200, 1/10400, 1/3.5, 1400, 1000, 1700, 144
## Initial (dummy)
y_0=[1000,100000000,10,1,0,0,0,100]
## Solve
t= np.linspace(1,60,60)
result=odeint(func_ode,y_0,t,args=(qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d))
## Plot
plt.plot(t, result[:, 0], label='Sq')
plt.plot(t, result[:, 1], label='S')
plt.plot(t, result[:, 2], label='E1')
plt.plot(t, result[:, 3], label='E2')
plt.plot(t, result[:, 4], label='H')
plt.plot(t, result[:, 5], label='R')
plt.plot(t, result[:, 6], label='D')
plt.plot(t, result[:, 7], label='V')
plt.legend(loc='best')
plt.xlabel('t')
plt.grid()
plt.show()
pass
要将优化参数与输入数据一起使用,这是我不工作的代码:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import os
import pandas as pd
def func_ode(y,t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d):
S_q,S,E1,E2,H,R,D,V=y
dSq=qS*S-qSq*S_q
dS=qSq*S_q-(betaV1*V+betaV2*V+beta1*E1+beta2*E2)*S
dE1=(betaV1*V+beta1*E1)*S-(e1+eta1)*E1
dE2=(betaV2*V+beta2*E2)*S+e1*E1-(eta2+delta2+theta2)*E2
dH=theta2*E2-(etaH+deltaH)*H # theta is for recovered
dR=eta1*E1+eta2*E2+etaH*H # eta is for recovered
dD=delta2*E2+deltaH*H # delta is for death
dV=f1*E1+f2*E2+fH*H-d*V
dy=[dSq, dS, dE1, dE2, dH, dR, dD, dV]
return dy
def func_y(t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d,y_0):
"""
Solution to the ODE y'(t) = f(t,y,parameters) with initial condition y(0) = y_0
"""
y = odeint(func_ode, y_0, t, args=(qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d))
return y[1:,:]
if __name__ == "__main__":
file_name='Data Dummy.xlsx'
current_path=os.getcwd()
file_path=os.path.join(current_path,file_name)
sheet_name='Sheet1'
df_raw=pd.read_excel(file_path,sheet_name=sheet_name)
numpy_data=df_raw[[
'Self-quarantine susceptible',
'Susceptible',
'E1 (OTG)',
'E2 (ODP)',
'H (Hospitalized: PDP + Positif)',
'R (Sembuh)',
'D (Meninggal)',
'V (Virus)'
]].to_numpy()
## Parameters (dummy)
qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d = \
0, 1e-4, 4e-9, 1e-9, 4e-9, 1e-9, 1/100, 1/21, 1/104, 1/10, 1/200, 1/10400, 1/3.5, 1400, 1000, 1700, 144
# Used Data
y_0=numpy_data[0,:].tolist()
numpy_data=numpy_data[1:60,:]
## Reference Time
number_of_reference_time,_=np.shape(numpy_data)
## Solve
param = qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d, y_0
t= np.linspace(1,number_of_reference_time,number_of_reference_time)
popt, cov = curve_fit(func_y, t, numpy_data,p0=[param])
qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d = popt
## Check Result
result=odeint(func_ode,y_0,t,args=(qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d))
## Plot
plt.plot(t, result[:, 0], label='Sq')
plt.plot(t, result[:, 1], label='S')
plt.plot(t, result[:, 2], label='E1')
plt.plot(t, result[:, 3], label='E2')
plt.plot(t, result[:, 4], label='H')
plt.plot(t, result[:, 5], label='R')
plt.plot(t, result[:, 6], label='D')
plt.plot(t, result[:, 7], label='V')
plt.legend(loc='best')
plt.xlabel('t')
plt.grid()
plt.show()
pass
错误结果显示:
File "...\Programs\Python\Python37\lib\site-packages\scipy\optimize\minpack.py", line 458, in func_wrapped
return func(xdata, *params) - ydata
ValueError: operands could not be broadcast together with shapes (58,8) (59,8)
似乎curve_fit
不适合odeint
有多个图表?或者我在这里错过了什么?
编辑:我将fixed编辑y[1:,:]
到. 另外,将输入更改为可以在pastebin中看到代码。现在问题变成了:y.flatten()
popt, cov = curve_fit(func_y, t, numpy_data,p0=[param])
popt, cov = curve_fit(func_y, t, numpy_data.flatten(),p0=[param])
numpy.array(list)
File "....py", line 164, in <module>
popt, cov = curve_fit(func_y, t, numpy_data.flatten(),p0=[param])
File "...\Python\Python37\lib\site-packages\scipy\optimize\minpack.py", line 752, in curve_fit
res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
File "...\Python\Python37\lib\site-packages\scipy\optimize\minpack.py", line 396, in leastsq
gtol, maxfev, epsfcn, factor, diag)
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
解决方案
有几个问题:首先,错误消息是说两个数组ydata
的func(xdata, *params)
形状不同:(59, 8) 和 (58, 8)。那可能是因为你func_y
这样做:
return y[1:,:]
而且:您可能需要将您的y
数据和模型函数的结果“展平”为一维(472 个观察值),这样您就func_y
可以:
return y.flatten()
curve_fit
你和你一起跑
popt, cov = curve_fit(func_y, t, numpy_data.flatten(), p0=[param])
但是,还有另一个(AFAIK)curve_fit
无法处理的概念问题。看来您的函数的最后一个参数func_y()
是y_0
一个 8 元素数组,并且是 ODE 积分的下限,而不是曲线拟合中的可变参数。 curve_fit
期望模型函数的第一个参数是“自变量”(此处为 )的一维数组,t
并且所有参数都是将成为拟合变量的标量。
当你这样做
参数 = qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d, y_0
您正在制作一个包含 17 个变量和 1 个 8 元素数组的元组y_0
。 curve_fit
会这样做numpy.array(param)
,期望每个元素都是param
标量。最后一个元素是列表或数组,这会产生一个对象数组,它会给出您看到的错误消息。
为了更好地组织参数和拟合结果,包括可以轻松固定或给定界限的命名参数,以及探索参数值和预测中的不确定性的高级方法,您可以考虑使用lmfit
( https://lmfit.github.io/lmfit- py/ )。特别是,lmfit.Model
是一个曲线拟合类,它将通过名称识别您的函数参数。对于您的示例来说重要的是,它允许多个自变量,并允许这些自变量是任何 Python 类型(不限于一维数组)。 lmfit.Model
也为你做扁平化。使用lmfit
,您的示例代码将如下所示:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from lmfit import Model
def func_ode(y,t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,
eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d):
Sq,S,E1,E2,H,R,D,V=y
dSq=qS*S-qSq*Sq
dS=qSq*Sq-(betaV1*V+betaV2*V+beta1*E1+beta2*E2)*S
dE1=(betaV1*V+beta1*E1)*S-(e1+eta1)*E1
dE2=(betaV2*V+beta2*E2)*S+e1*E1-(eta2+delta2+theta2)*E2
dH=theta2*E2-(etaH+deltaH)*H # theta is for recovered
dR=eta1*E1+eta2*E2+etaH*H # eta is for recovered
dD=delta2*E2+deltaH*H # delta is for death
dV=f1*E1+f2*E2+fH*H-d*V
dy=[dSq, dS, dE1, dE2, dH, dR, dD, dV]
return dy
numpy_data=np.array([.... ]) # taken from your pastebin example
def func_y(t, qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1,
eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d, y_0):
"""
Solution to the ODE y'(t) = f(t, y, parameters) with
initial condition y(0) = y_0
"""
return odeint(func_ode, y_0, t, args=(qS, qSq, betaV1, betaV2,
beta1, beta2, e1, eta1,
eta2, etaH, delta2, deltaH,
theta2, f1, f2, fH, d))
y_0 = numpy_data[0,:].tolist()
numpy_data = numpy_data[1:60,:]
number_of_reference_time, _ = np.shape(numpy_data)
t = np.linspace(1, number_of_reference_time, number_of_reference_time)
# create a model from your function, identifying the names of the
# independent variables (arguments to not treat as variables in the fit)
omodel = Model(func_y, independent_vars=['t', 'y_0'])
# make parameters for this model, using the argument names from
# your model function
params = omodel.make_params(qS=0, qSq=1e-4, betaV1=4e-9, betaV2=1e-9,
beta1=4e-9, beta2=1e-9, e1=1/100,
eta1=1/21, eta2=1/104, etaH=1/10,
delta2=1/200, deltaH=1/10400, theta2=1/3.5,
f1=1400, f2=1000, fH=1700, d=144)
# do the fit to `data` with `parameters` and passing in the
# independent variables explicitly
result = omodel.fit(numpy_data, params, t=t, y_0=y_0)
# print out fit statistics, best fit values, estimated uncertainties
# note: best fit parameters will be in `result.params['qS']`, etc
print(result.fit_report(min_correl=0.5))
# Plot the portions of the best fit results
plt.plot(t, result.best_fit[:, 0], label='Sq')
plt.plot(t, result.best_fit[:, 1], label='S')
plt.plot(t, result.best_fit[:, 2], label='E1')
plt.plot(t, result.best_fit[:, 3], label='E2')
plt.plot(t, result.best_fit[:, 4], label='H')
plt.plot(t, result.best_fit[:, 5], label='R')
plt.plot(t, result.best_fit[:, 6], label='D')
plt.plot(t, result.best_fit[:, 7], label='V')
plt.legend(loc='best')
plt.xlabel('t')
plt.grid()
plt.show()
这将打印出一份报告
[[Model]]
Model(func_y)
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 162
# data points = 472
# variables = 17
chi-square = 4.1921e+18
reduced chi-square = 9.2134e+15
Akaike info crit = 17367.1373
Bayesian info crit = 17437.8060
[[Variables]]
qS: -0.01661105 +/- 0.00259955 (15.65%) (init = 0)
qSq: 1.2272e-04 +/- 2.5428e-05 (20.72%) (init = 0.0001)
betaV1: 4.5773e-09 +/- 6.9243e-10 (15.13%) (init = 4e-09)
betaV2: 7.6846e-10 +/- 1.7084e-10 (22.23%) (init = 1e-09)
beta1: 1.3770e-10 +/- 8.4682e-12 (6.15%) (init = 4e-09)
beta2: 6.0831e-10 +/- 1.1471e-10 (18.86%) (init = 1e-09)
e1: 0.04271070 +/- 0.00378279 (8.86%) (init = 0.01)
eta1: 0.00182043 +/- 3.7579e-04 (20.64%) (init = 0.04761905)
eta2: -0.01052990 +/- 5.4028e-04 (5.13%) (init = 0.009615385)
etaH: 0.12337818 +/- 0.01710376 (13.86%) (init = 0.1)
delta2: 0.00644283 +/- 5.9399e-04 (9.22%) (init = 0.005)
deltaH: 9.0316e-05 +/- 4.1630e-05 (46.09%) (init = 9.615385e-05)
theta2: 0.22640213 +/- 0.06697444 (29.58%) (init = 0.2857143)
f1: 447.226564 +/- 88.1621816 (19.71%) (init = 1400)
f2: -240.442614 +/- 30.5435276 (12.70%) (init = 1000)
fH: 3780.95590 +/- 543.897368 (14.39%) (init = 1700)
d: 173.743295 +/- 24.3128286 (13.99%) (init = 144)
[[Correlations]] (unreported correlations are < 0.500)
C(qS, deltaH) = -0.889
C(etaH, theta2) = -0.713
C(betaV1, f1) = -0.692
C(beta1, beta2) = -0.681
C(betaV2, etaH) = -0.673
C(qS, eta2) = -0.652
C(deltaH, d) = -0.651
C(betaV1, theta2) = 0.646
C(f1, d) = 0.586
C(eta2, deltaH) = 0.585
C(betaV2, d) = 0.582
C(qSq, betaV1) = -0.523
C(betaV2, f1) = 0.510
并制作这样的情节:
我不知道这是否是您正在寻找的最合适的,但我希望它可以帮助您入门。