首页 > 解决方案 > Julia 的表现优于 MATLAB?

问题描述

我一直在尝试优化一些以二维数组形式获取输入信号并将其与某个 FWHM 的高斯卷积的代码。这个 2D 信号数组的大小是 (671, 2001),我沿着第二个轴 - 时间轴进行卷积。在哪里,

time = 0:0.5:1000
nt = length(time)  # nt = 2001

我正在与 150 的 FWHM 高斯卷积,对应于大约 63.7 的标准偏差。然后我将初始信号填充 3 倍 FWHM,将原始开始索引之前的新值设置为 0,将原始结束索引之后的新值设置为最后一个时间点的信号。

因此,nt = 3801在填充之后。

我的 MATLAB 代码在 117.5043 秒内计算了这个卷积。这是通过每个轴上的嵌套 for 循环完成的 - 在 MATLAB 中应该很慢。MATLAB代码的主要位如下,

for it = 1:nt
    % scan convolution mask across time
    kernal  = preb*exp(-b*(tc-tc(it)).^2); % convolution mask - Gaussian
    N(it) = sum(kernal)*dt;                % normalise
  
    kernal = kernal/N(it); % ensure convolution integrates to 1

    % calculate convolution for each row
    for iq = 1:Nq
        WWc(iq,it) = dt*sum((WWc0(iq,1:nt).').*kernal(1:nt));
    end
end

我已经在 J​​ulia 中重写了这个,除了使用循环矩阵来构造一个高斯内核,这个有效地随着时间的推移移除了 foor 循环,并且要在所有时间内应用的内核现在完全由一个矩阵定义。然后,我要么循环遍历行并将此内核乘以每一行的信号,要么使用该mapslices函数。Julia 中的主要代码如下,

K = Circulant(G.Fx) # construct circulant matrix of gaussian function G.Fx
conv = zeros(nr, nt) # init convolved signal

#for i=1:nr # If using a loop instead of mapslice
#    conv[i, :] = transpose(sC[i, :]) * K
#end

conv = mapslices(x -> K*x, sC; dims=2)

在 Julia 中,这个卷积需要 368 秒(几乎是 MATLAB 的 3 倍慢),尽管使用循环矩阵来跳过一个 foo 循环并将其减少到为每行乘以两个大小为 (1, 3801) 和 (3801, 3801) 的数组.

@time 的输出:

368.047741 seconds (19.37 G allocations: 288.664 GiB, 14.30% gc time, 0.03% compilation time)

两者都在同一台计算机上运行,​​Julia 应该不会在这里胜过 MATLAB 吗?

更新:

再玩一下 Julia 代码,我已经根据循环重写了矩阵乘法,但它仍然很慢。尽管根据输入标志调用两种外部构造函数类型 - 高斯或洛伦兹,但卷积在函数内执行。我还为循环构造函数加载 SpecialMatrices.jl。我是 Julia 的新手,不确定这是否会使其变慢。当在函数的独立位上使用 @time 宏时,很明显每个部分都非常快,直到它到达最后实际计算卷积的循环。无论我这样做,这个循环都非常慢,无论是在循环内核上使用矩阵乘法,结合sum()函数的双嵌套循环,还是本身进行求和的三重嵌套循环。

using MAT
using SpecialMatrices

wd = " "

inputfile = matopen(wd*"/Signal.mat")
signal = read(inputfile, "pdW")
close(inputfile)

inputfile = matopen(wd*"/Vectors.mat")
qAng = read(inputfile, "qAng")
tvec = read(inputfile, "tt")

struct Gaussian
    Fx:: AbstractVector{Float64}
    sigma:: Float64
    height:: Float64
    fwhm:: Float64
    function Gaussian(fwhm:: Float64, x:: StepRangeLen{Float64}, x0:: Float64)
        sigma = fwhm/2sqrt(2log(2))
        height = 1/sigma*sqrt(2pi)
        Fx = height .* exp.( (-1 .* (x .- x0).^2)./ (2*sigma^2))
        new(Fx, sigma, height, fwhm)
    end
end

struct Lorentzian
    Fx:: AbstractVector{Float64}
    gamma:: Float64
    fwhm:: Float64
    function Lorentzian(fwhm:: Float64, x:: StepRangeLen{Float64}, x0:: Float64)
        gamma = fwhm/2
        Fx = 1 ./ ((pi * gamma) .* (1 .+ ((x .- x0)./ gamma).^2 ))
        new(Fx, gamma, fwhm)
    end
end


function convolve(S:: Array{Float64}, fwhm:: Float64, time:: Array{Float64}, type:: Int)

    nr:: Int64, nc:: Int64 = size(S)
    nt:: Int64 = length(time)
    dt:: Float64 = sum(diff(time, dims=2))/(length(time)-1)
    duration:: Float64 = fwhm*3 # must pad three time FHWM to deal with edges

    if dt != diff(time, dims=2)[1] ; error("Non-linear time range."); end
    if nr != nt && nc != nt; error("Inconsistent dimensions."); end
    if nc != nt; S = transpose(S); nr, nc = nc, nr; end # column always time axis

    tmin:: Float64 = minimum(time); tmax:: Float64 = maximum(time)
    tconv_min:: Float64 = tmin - duration ; tconv_max:: Float64 = tmax + duration
    tconv = tconv_min:dt:tconv_max # extended convoluted time vector
    ntc:: Int64 = length(tconv)
    padend:: Int64 = (duration/dt) # index at which 0 padding ends and signal starts
    padstart:: Int64 = (padend + tmax / dt) # index where signal ends and padding starts

    type == 0 ? Kv = Gaussian(fwhm, tconv, tconv[1]) : Kv = Lorentzian(fwhm, tconv, tconv[1])
    println("Convolving signal with a $(typeof(Kv)) function with FWHM: $(Kv.fwhm) fs.")

    K:: Array{Float64} = Circulant(Kv.Fx) # construct kernel from circ matrix
    sC = zeros(Float64, nr, ntc)
    sC[1:nr, 1:padend] .= S[1:nr, 1] # extended and pad signal
    sC[1:nr, padend:padstart] .= S
    sC[1:nr, padstart:end] .= S[1:nr, end]

    println("""
            Signal padded by 3*FWHM ( $(duration) fs ) forwards and backwards.
            Original time length: $(nt), Extended time: $(ntc), Diff: $(ntc-nt) steps.
            Padded signal size: $(size(sC)).
            Kernel size: $(size(K[1, :])).
            """)
    conv = zeros(Float64, nr, ntc)
    for t in 1:ntc
        for q in 1:nr
            #conv[q, t] = sum(sC[q, 1:ntc] .* K[t, :])*dt
            conv[q, t] = 0.0
            for j in 1:ntc
                conv[q, t] += sC[q, j] * K[t, j] * dt
            end
        end
    end

    return conv, tconv
end

fwhm = 100.0
conv, tconv = convolve(signal, fwhm, tvec, 0)

更新 2:工作快速代码,用于卷积随机信号的 wa 包装器。

using LinearAlgebra
using SpecialMatrices

function wrapper()
    signal = rand(670, 2001)
    tvec = 0:0.5:1000
    fwhm = 150.0
    Flag = 0
    conv = convolve(signal, tvec, fwhm, Flag)
end


function Gaussian(x:: StepRangeLen{Float64}, x0:: Float64, fwhm:: T) where {T<:Union{Float64, Int64}}
    sigma = fwhm/2sqrt(2log(2))
    height = 1/sigma*sqrt(2pi)
    Fx = height .* exp.( (-1 .* (x .- x0).^2)./ (2*sigma^2))
    Fx = Fx./sum(Fx)
end


function Lorentzian(x:: StepRangeLen{Float64}, x0:: Float64, fwhm:: T) where {T<:Union{Float64, Int64}}
    gamma = fwhm/2
    Fx = 1 ./ ((pi * gamma) .* (1 .+ ((x .- x0)./ gamma).^2 ))
    Fx = Fx./sum(Fx)
end


function generate_kernel(tconv:: StepRangeLen{Float64}, fwhm:: Float64, centre:: Float64, ctype:: Int64)
    if ctype == 0
        K = Gaussian(tconv, centre, fwhm)
    elseif cytpe == 1
        K = Lorentzian(tconv, centre, fwhm)
    else
        error("Only Gaussian and Lorentzian functions currently available.")
    end
    K = Matrix(Circulant(K))
    return K
end


function convolve(S:: Matrix{Float64}, time:: StepRangeLen{Float64}, fwhm:: Float64, ctype:: Int64)
    nr:: Int64, nc:: Int64 = size(S)
    dt:: Float64 = sum(diff(time, dims=1))/(length(time)-1)
    nt:: Int64 = length(time)
    duration:: Float64 = fwhm*3 # must pad three time FHWM to deal with edges
    tmin:: Float64 = minimum(time); tmax:: Float64 = maximum(time)
    if dt != diff(time, dims=1)[1] ; error("Non-linear time range."); end
    if nr != nt && nc != nt; error("Inconsistent dimensions."); end
    if nc != nt; S = copy(transpose(S)); nr, nc = nc, nr; end # column always time axis

    tconv_min:: Float64 = tmin - duration ; tconv_max:: Float64 = tmax + duration
    tconv = tconv_min:dt:tconv_max # extended convoluted time vector
    ntc = length(tconv)
    padend:: Int64 = (duration/dt) # index at which 0 padding ends and signal starts
    padstart:: Int64 = (padend + tmax / dt) # index where signal ends and padding starts

    K = generate_kernel(tconv, fwhm, tconv[padend], ctype)

    sC = zeros(Float64, nr, ntc)
    sC[1:nr, 1:padend] .= @view S[1:nr, 1] # extended and pad signal
    sC[1:nr, padend:padstart] .= S
    sC[1:nr, padstart:end] .= @view S[1:nr, end]

    conv = zeros(Float64, nr, ntc)
    conv = convolution_integral(sC, K, dt)

    S .= conv[:, 1:nt] # return convoluted signal w same original size

    return S
    end


function convolution_integral(signal:: Matrix{Float64}, kernel:: Matrix{Float64}, dt:: Float64)
    LinearAlgebra.BLAS.set_num_threads(Sys.CPU_THREADS)
    conv = signal * kernel
    conv .*= dt
    return conv
end

使用 BenchMarktools:

julia> @btime wrapper();
  128.438 ms (11125 allocations: 300.24 MiB)

填充信号有一些奇怪的切片,因为早期的填充在卷积之后翻转到时间扩展的末端。我认为与高斯核的居中有关。现在我只是将数组切回到它的原始尺寸而没有填充并得到我期望的结果。

标签: matlabfor-loopmatrixjuliamatrix-multiplication

解决方案


推荐阅读