首页 > 解决方案 > 优化独立 N 体仿真的方法

问题描述

我对并行计算和 Numba 包比较陌生。我正在为我惊人的并行 N 体模拟寻找优化方法。到目前为止,我已经使用 Numpy 数组、JIT 编译器和多处理应用了我所知道的一切。但是,我仍然没有达到我想要的速度(我看过他们的代码更快的视频)

我目前拥有的是一个相当简单的 python 积分器,它使用 Runge-Kutta 积分和两个运动方程。我在我的领域经常与数值积分器合作,所以我肯定想从你们那里学到更多的技巧。

我在下面发布了我的代码,但本质上,我有一个名为“Motion”的主要功能,它需要 2 个初始条件并在一定时间内整合它们的运动。我已经 JITTED 这个函数和它迭代调用的所有函数:“RK4”、“ODE”、“电场”。最后,我从 Multiprocessing 中调用池函数来并行化“运动”函数,并为其运行的每个模拟插入不同的初始条件。

同样,我已经实现了我所知道的所有类型的优化,但是我仍然对它的速度不太满意。我已经在下面发布了我的代码。如果有人能发现一种可以进一步优化的算法,那将非常有帮助和教育意义(至少对我来说)!感谢您的时间。

import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from IPython.display import clear_output
from scipy import interpolate


"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency =  np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field 
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)

"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant


"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second 
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)

Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM

"Constants"    
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s

"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
    L_sample = np.linspace(1,10,100) 
    phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
    phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
    L0i = phidot_to_L(omega/m)
    return L0i 
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)

@njit(nogil= True)
def Electric_Field(t,r):
    E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
    Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
    Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
    Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
    return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"

"Particle's ODE - Equation of Motion"
@njit(nogil= True)
def ODE(t,r):
    Er, Ephi = Electric_Field(t,r) #Pull out the electric so we only call it once.
    Ldot = Ephi * r[0]**3 / (Re*Beq)
    Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
    return np.array([Ldot,Phidot])

@njit(nogil= True)
def RK4(t,r): #Standard Runge-Kutta Integration Algorthim
    k1 = Step_Size*ODE(t,r)
    k2 = Step_Size*ODE(t+Step_Size/2, r+k1/2)
    k3 = Step_Size*ODE(t+Step_Size/2, r+k2)
    k4 = Step_Size*ODE(t+Step_Size, r+k3)
    return r + k1/6 + k2/3 + k3/3 + k4/6

@njit(nogil= True)
def Motion(L0,Phi0): #Insert Inital Conditions and it will loop through the RK4 integrator and output all its positions.
    L_Array = np.zeros_like(time_array)
    Phi_Array = np.zeros_like(time_array)
    
    L_Array[0] = L0
    Phi_Array[0] = Phi0
    for i in range(1,N_points):
        L_Array[i], Phi_Array[i] = RK4(time_array[i-1], np.array([ L_Array[i-1],Phi_Array[i-1] ]) )
    
    return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many] 
    #Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data

# Location = Motion(5,0)
# x = Location[0]*np.cos(Location[1])
# y = Location[0]*np.sin(Location[1])
# plt.plot(x,y,"o", markersize = 0.5)
# ts = time()
# Motion(5,0)
# print('Solo Time:', time() - ts)

"Getting my Arrays ready so I can index it"
Split = int(np.sqrt(N_Particles))
L0i = np.linspace(4.4,5.5,Split)
Phi0i = np.linspace(0,360,Split) / 180 * np.pi
L0_Grid = np.repeat(L0i,Split) 
# ^Here I want to run a meshgrid of L0i and Phi0, so I repeat L0i using this function and mod (%) the index on the Phi Function


#Create Appending Array
results = []
def get_results(result): #Call back to this array from Multiprocessing to append the results it gives per run.
    results.append(result)
    clear_output()
    print("Getting Results %0.2f" %(len(results)/N_Particles * 100), end='\r')
    
if __name__ == '__main__':
    #Call In Multiprocessing
    pool = mp.Pool(mp.cpu_count()) #Counting number of threads to start
    ts = time() #Timing this process. Begins here
    for ii in range(N_Particles): #Not too sure what this does, but it works - I assume it parallelizes this loop
        pool.apply_async(Motion, args = (L0_Grid[ii],Phi0i[int(ii%Split)]), callback=get_results)
        
    pool.close() #I'm not too sure what this does but everyone uses it, and it won't work without it
    pool.join()
    print('Time in MP parallel:', time() - ts) #Output Time

标签: pythonoptimizationjitnumbarunge-kutta

解决方案


我认为您的代码运行缓慢的主要原因是您的 Runge-Kutta 方法具有固定的时间步长。花式 ODE 求解器将选择允许可容忍误差量的最大时间步长。一个例子是 ODEPACK 的 LSODA ODE 求解器。

下面我使用NumbaLSODA 重写了您的代码。在我的电脑上,它可以将你的代码加速大约 200 倍。

import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from scipy import interpolate

from NumbaLSODA import lsoda_sig, lsoda
from numba import cfunc
import numba as nb

"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency =  np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field 
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)

"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant

"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second 
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)

Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM

"Constants"    
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s

"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
    L_sample = np.linspace(1,10,100) 
    phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
    phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
    L0i = phidot_to_L(omega/m)
    return L0i 
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)

@njit
def Electric_Field(t,r):
    E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
    Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
    Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
    Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
    return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"

"Particle's ODE - Equation of Motion"
@cfunc(lsoda_sig)
def ODE(t, r_, dr, p):
    r = nb.carray(r_, (2,))
    Er, Ephi = Electric_Field(t,r)
    Ldot = Ephi * r[0]**3 / (Re*Beq)
    Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
    dr[0] = Ldot
    dr[1] = Phidot
funcptr = ODE.address
    
@njit
def Motion(L0,Phi0):
    u0 = np.array([L0,Phi0],np.float64)
    data = np.array([5.0])
    usol, success = lsoda(funcptr, u0, time_array, data)
    L_Array = usol[:,0]
    Phi_Array = usol[:,1]
    return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many]
    #Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data

Location = Motion(5,0)
x = Location[0]*np.cos(Location[1])
y = Location[0]*np.sin(Location[1])
plt.plot(x,y,"o", markersize = 0.5)
ts = time()
Motion(5,0)
print('Solo Time:', time() - ts)

推荐阅读