首页 > 解决方案 > Cythonizing python 3代码时未能实现有效的速度提升

问题描述

我对 Python 3 代码进行了 cythonized,但我未能加快它的速度。纯 Python 3 代码执行期间经过的时间约为 29 秒,而 cythonized 代码的执行时间约为 25 秒(详细信息如下)。我在 cythonized 代码中哪里出错了。如果你能帮助我,我会很高兴。我分别在纯 Python 3 代码、cythonized 代码和设置文件下面添加。

代码在 for 循环中运行 100 次。每个循环中使用的数组的大小如下:

  • 速度 = 3300

  • V = 3300

  • S = 3300 x 3300

  • 维尔斯 = 201

  • line_centers(在掩码中)= ~100

如有必要,我可以在这篇文章中添加示例数据。

import numpy as np
import numpy.linalg as la

def lsd(velos, V, S, vels, masks, Lambda=0.):

    m, n = len(vels), len(velos)
    Nmask = len(masks)
    V = V - 1

    M = np.zeros((n, m * len(masks)))
    for N, (line_centers, weights) in enumerate(masks):
        for l, lc in enumerate(line_centers):
            vi = velos - lc

            for j in range(m - 1):
                w = np.argwhere((vi < vels[j + 1]) & (vi > vels[j])).T[0]

                if len(w) == 0: continue

                M[w, j + N * m] = weights[l] * (vels[j + 1] - vi[w]) / (vels[j + 1] - vels[j])
                M[w, j + 1 + N * m] = weights[l] * (vi[w] - vels[j]) / (vels[j + 1] - vels[j])

    if np.abs(np.sum(M)) < 1e-8:
        return np.zeros((1, len(vels)))

    if Lambda:
        R = np.zeros((m * Nmask, m * Nmask))
        for i in range(1, m-1):
            R[i, i] = 2
            R[i-1, i] = -1
            R[i+1, i] = -1
        R[0, 0] = 1
        R[1, 0] = -1
        R[-1, -1] = 1
        R[-2, -1] = -1

    X = np.matmul(M.T, (S**2))
    XM = np.matmul(X, M)
    if Lambda:
        XM = XM + Lambda * R
    cc = np.matmul(X, V)

    Z, res, rank, s = la.lstsq(XM, cc, rcond=None)

    # ZT = Z.T
    # ccT = cc.T

    # Z_ = []
    # C_ = []
    # for i in range(len(Z)):
    #     Z_.append([])
    #     C_.append([])
    #     for N in range(Nmask):
    #         Z_[-1].append(Z[i][N * m: (N + 1) * m])
    #         C_[-1].append(cc[i][N * m: (N + 1) * m])

    return Z.T
import numpy as np
cimport numpy as np
import cython
# from libcpp.vector cimport vector

DTYPE = np.float
ctypedef np.double_t DTYPE_t
@cython.boundscheck(False)
# @cython.wraparound(False)
@cython.cdivision(False)
@cython.initializedcheck(True)
cpdef lsd(np.ndarray[DTYPE_t, ndim=1] velos, np.ndarray[DTYPE_t, ndim=2] V, np.ndarray[DTYPE_t, ndim=2] S,
          np.ndarray[DTYPE_t, ndim=1] vels, np.ndarray[DTYPE_t, ndim=3] masks, float Lambda=0.):

    cdef int m = vels.shape[0]
    cdef int n = velos.shape[0]
    cdef int Nmask = masks.shape[0]
    cdef int N, l, j, i
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] M = np.zeros((n, m * Nmask), dtype=DTYPE)
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] R = np.zeros((m * Nmask, m * Nmask), dtype=DTYPE)
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] X
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] XM
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] cc
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] Z
    cdef np.ndarray[DTYPE_t, ndim=1, mode='c'] line_centers, weights, vi
    cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] zeros = np.zeros((1, m), dtype=DTYPE)
    cdef np.ndarray w
    # cdef double lc

    V = V - 1
    for N in range(Nmask):
        line_centers = masks[N][0]
        weights = masks[N][1]
        for l in range(len(line_centers)):
            vi = velos - line_centers[l]

            for j in range(m - 1):
                # print(np.argwhere((vi < vels[j + 1]) & (vi > vels[j])).T[0])
                w = np.argwhere((vi < vels[j + 1]) & (vi > vels[j])).T[0]

                if len(w) == 0: continue

                M[w, j + N * m] = weights[l] * (vels[j + 1] - vi[w]) / (vels[j + 1] - vels[j])
                M[w, j + 1 + N * m] = weights[l] * (vi[w] - vels[j]) / (vels[j + 1] - vels[j])

    if np.abs(np.sum(M)) < 1e-8:
        return zeros

    if Lambda:
        for i in range(1, m-1):
            R[i, i] = 2
            R[i-1, i] = -1
            R[i+1, i] = -1
        R[0, 0] = 1
        R[1, 0] = -1
        R[-1, -1] = 1
        R[-2, -1] = -1

    X = np.matmul(M.T, (S**2))
    XM = np.matmul(X, M)
    if Lambda:
        XM = XM + Lambda * R
    cc = np.matmul(X, V)

    Z, _, _, _ = np.linalg.lstsq(XM, cc, rcond=None)

    # ZT = Z.T
    # ccT = cc.T

    # Z_ = []
    # C_ = []
    # for i in range(len(Z)):
    #     Z_.append([])
    #     C_.append([])
    #     for N in range(Nmask):
    #         Z_[-1].append(Z[i][N * m: (N + 1) * m])
    #         C_[-1].append(cc[i][N * m: (N + 1) * m])

    return Z.T
from setuptools import setup
from Cython.Build import cythonize
import sys
import numpy

setup(
    ext_modules=cythonize('LSD_Cythonize.pyx',
                          compiler_directives={'language_level' : sys.version_info[0]}),
                          include_dirs=[numpy.get_include()])

标签: python-3.xcythoncythonize

解决方案


推荐阅读