python - 优化独立 N 体仿真的方法
问题描述
我对并行计算和 Numba 包比较陌生。我正在为我惊人的并行 N 体模拟寻找优化方法。到目前为止,我已经使用 Numpy 数组、JIT 编译器和多处理应用了我所知道的一切。但是,我仍然没有达到我想要的速度(我看过他们的代码更快的视频)
我目前拥有的是一个相当简单的 python 积分器,它使用 Runge-Kutta 积分和两个运动方程。我在我的领域经常与数值积分器合作,所以我肯定想从你们那里学到更多的技巧。
我在下面发布了我的代码,但本质上,我有一个名为“Motion”的主要功能,它需要 2 个初始条件并在一定时间内整合它们的运动。我已经 JITTED 这个函数和它迭代调用的所有函数:“RK4”、“ODE”、“电场”。最后,我从 Multiprocessing 中调用池函数来并行化“运动”函数,并为其运行的每个模拟插入不同的初始条件。
同样,我已经实现了我所知道的所有类型的优化,但是我仍然对它的速度不太满意。我已经在下面发布了我的代码。如果有人能发现一种可以进一步优化的算法,那将非常有帮助和教育意义(至少对我来说)!感谢您的时间。
import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from IPython.display import clear_output
from scipy import interpolate
"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency = np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)
"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant
"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)
Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM
"Constants"
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s
"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
L_sample = np.linspace(1,10,100)
phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
L0i = phidot_to_L(omega/m)
return L0i
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)
@njit(nogil= True)
def Electric_Field(t,r):
E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"
"Particle's ODE - Equation of Motion"
@njit(nogil= True)
def ODE(t,r):
Er, Ephi = Electric_Field(t,r) #Pull out the electric so we only call it once.
Ldot = Ephi * r[0]**3 / (Re*Beq)
Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
return np.array([Ldot,Phidot])
@njit(nogil= True)
def RK4(t,r): #Standard Runge-Kutta Integration Algorthim
k1 = Step_Size*ODE(t,r)
k2 = Step_Size*ODE(t+Step_Size/2, r+k1/2)
k3 = Step_Size*ODE(t+Step_Size/2, r+k2)
k4 = Step_Size*ODE(t+Step_Size, r+k3)
return r + k1/6 + k2/3 + k3/3 + k4/6
@njit(nogil= True)
def Motion(L0,Phi0): #Insert Inital Conditions and it will loop through the RK4 integrator and output all its positions.
L_Array = np.zeros_like(time_array)
Phi_Array = np.zeros_like(time_array)
L_Array[0] = L0
Phi_Array[0] = Phi0
for i in range(1,N_points):
L_Array[i], Phi_Array[i] = RK4(time_array[i-1], np.array([ L_Array[i-1],Phi_Array[i-1] ]) )
return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many]
#Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data
# Location = Motion(5,0)
# x = Location[0]*np.cos(Location[1])
# y = Location[0]*np.sin(Location[1])
# plt.plot(x,y,"o", markersize = 0.5)
# ts = time()
# Motion(5,0)
# print('Solo Time:', time() - ts)
"Getting my Arrays ready so I can index it"
Split = int(np.sqrt(N_Particles))
L0i = np.linspace(4.4,5.5,Split)
Phi0i = np.linspace(0,360,Split) / 180 * np.pi
L0_Grid = np.repeat(L0i,Split)
# ^Here I want to run a meshgrid of L0i and Phi0, so I repeat L0i using this function and mod (%) the index on the Phi Function
#Create Appending Array
results = []
def get_results(result): #Call back to this array from Multiprocessing to append the results it gives per run.
results.append(result)
clear_output()
print("Getting Results %0.2f" %(len(results)/N_Particles * 100), end='\r')
if __name__ == '__main__':
#Call In Multiprocessing
pool = mp.Pool(mp.cpu_count()) #Counting number of threads to start
ts = time() #Timing this process. Begins here
for ii in range(N_Particles): #Not too sure what this does, but it works - I assume it parallelizes this loop
pool.apply_async(Motion, args = (L0_Grid[ii],Phi0i[int(ii%Split)]), callback=get_results)
pool.close() #I'm not too sure what this does but everyone uses it, and it won't work without it
pool.join()
print('Time in MP parallel:', time() - ts) #Output Time
解决方案
我认为您的代码运行缓慢的主要原因是您的 Runge-Kutta 方法具有固定的时间步长。花式 ODE 求解器将选择允许可容忍误差量的最大时间步长。一个例子是 ODEPACK 的 LSODA ODE 求解器。
下面我使用NumbaLSODA 重写了您的代码。在我的电脑上,它可以将你的代码加速大约 200 倍。
import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from scipy import interpolate
from NumbaLSODA import lsoda_sig, lsoda
from numba import cfunc
import numba as nb
"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency = np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)
"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant
"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)
Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM
"Constants"
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s
"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
L_sample = np.linspace(1,10,100)
phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
L0i = phidot_to_L(omega/m)
return L0i
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)
@njit
def Electric_Field(t,r):
E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"
"Particle's ODE - Equation of Motion"
@cfunc(lsoda_sig)
def ODE(t, r_, dr, p):
r = nb.carray(r_, (2,))
Er, Ephi = Electric_Field(t,r)
Ldot = Ephi * r[0]**3 / (Re*Beq)
Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
dr[0] = Ldot
dr[1] = Phidot
funcptr = ODE.address
@njit
def Motion(L0,Phi0):
u0 = np.array([L0,Phi0],np.float64)
data = np.array([5.0])
usol, success = lsoda(funcptr, u0, time_array, data)
L_Array = usol[:,0]
Phi_Array = usol[:,1]
return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many]
#Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data
Location = Motion(5,0)
x = Location[0]*np.cos(Location[1])
y = Location[0]*np.sin(Location[1])
plt.plot(x,y,"o", markersize = 0.5)
ts = time()
Motion(5,0)
print('Solo Time:', time() - ts)
推荐阅读
- python - Pandas:如何交换一行的单元格值,以便它们按字母顺序排列
- java - 如何在 pom.xml 中添加一个新块
- c - 从 struct ** 初始化 struct - segfault
- html - 如何防止用户在可滚动的 div 内向左/向右滚动时转到上一页/下一页?
- javascript - Vue v-for:在数组中单独迭代一个元素
- c# - form.serialize 不适用于字符串列表
- android - 表示层中的数据映射器
- awk - 当我使用 awk 时,“if”条件检查对我不起作用
- python - 如何将熊猫数据框转换为数据字典
- android - 如何将 alpha 通道添加到 xml 中的现有 Android 颜色