首页 > 解决方案 > 是否可以创建性能类似于 System.Numerics.Vector4 的自定义向量类型?

问题描述

System.Numerics.Vector4类型具有出色的性能,因为 CLR 可以对其进行优化以使用矢量化 CPU 指令。但是,我想创建自己的自定义 4 元素、单精度浮点向量类型,以便我可以添加各种方便的方法和属性、属性、接口等......(IE,比我可以用扩展方法做的更多) .) 不幸的是,我自己的向量类型的性能几乎不如System.Numerics.Vector4,即使它在System.Numerics.Vector4内部使用 a 也是如此。有没有办法System.Numerics.Vector4从自定义向量类型中获得类似的性能?

这是一个程序,它尝试(并且大部分失败)通过System.Numerics.Vector4在自定义向量类型中嵌套 a 来提高性能:

using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;

class Program
{
    private const int ARR_LENGTH = 1000;
    private const int OUTER_LOOP = 1000000;

    static void Main(string[] args)
    {
        TestVector4();
        TestMyVector();
        TestMyVectorSimd();
    }

    static void TestVector4()
    {
        Vector4[] arr = new Vector4[ARR_LENGTH];
        for(int i = 0; i < arr.Length; i++)
            arr[i] = new Vector4(i, i, i, i);

        Stopwatch sw = Stopwatch.StartNew();
        Vector4 total = default;
        for(int i = 0; i < OUTER_LOOP; i++)
        {
            total = default;
            for(int j = 0; j < ARR_LENGTH; j++)
                total += arr[j];
        }
        sw.Stop();

        Console.WriteLine($"System.Numerics.Vector4: {total}  ({sw.Elapsed})");
    }

    static void TestMyVector()
    {
        MyVector[] arr = new MyVector[ARR_LENGTH];
        for(int i = 0; i < arr.Length; i++)
            arr[i] = new MyVector(i, i, i, i);

        Stopwatch sw = Stopwatch.StartNew();
        MyVector total = default;
        for(int i = 0; i < OUTER_LOOP; i++)
        {
            total = default;
            for(int j = 0; j < ARR_LENGTH; j++)
                total += arr[j];
        }
        sw.Stop();

        Console.WriteLine($"MyVector: {total}  ({sw.Elapsed})");
    }

    static void TestMyVectorSimd()
    {
        MyVectorSimd[] arr = new MyVectorSimd[ARR_LENGTH];
        for(int i = 0; i < arr.Length; i++)
            arr[i] = new MyVectorSimd(i, i, i, i);

        Stopwatch sw = Stopwatch.StartNew();
        MyVectorSimd total = default;
        for(int i = 0; i < OUTER_LOOP; i++)
        {
            total = default;
            for(int j = 0; j < ARR_LENGTH; j++)
                total += arr[j];
        }
        sw.Stop();

        Console.WriteLine($"MyVectorSimd: {total}  ({sw.Elapsed})");
    }
}

struct MyVector
{
    public float X, Y, Z, W;

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public MyVector(float x, float y, float z, float w)
    {
        X = x;
        Y = y;
        Z = z;
        W = w;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public override string ToString()
    {
        return $"<{X}, {Y}, {Z}, {W}>";
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public static MyVector operator +(MyVector left, MyVector right)
    {
        left.X += right.X;
        left.Y += right.Y;
        left.Z += right.Z;
        left.W += right.W;
        return left;
    }
}

struct MyVectorSimd
{
    public Vector4 V;

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public MyVectorSimd(float x, float y, float z, float w)
    {
        V = new Vector4(x, y, z, w);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public override string ToString()
    {
        return V.ToString();
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public static MyVectorSimd operator +(MyVectorSimd left, MyVectorSimd right)
    {
        left.V += right.V;
        return left;
    }
}

这个程序有 3 个测试。第一个测试的性能System.Numerics.Vector4。第二个MyVector使用简单的逐个元素添加测试自定义向量类型的性能。第三个测试MyVectorSimd本身使用嵌套的自定义向量类型的性能System.Numerics.Vector4。以下是在 .Net Core 3.1 上运行 Release 版本的计算机上的结果:

System.Numerics.Vector4: <499500, 499500, 499500, 499500>  (00:00:01.0635501)
MyVector: <499500, 499500, 499500, 499500>  (00:00:04.8566430)
MyVectorSimd: <499500, 499500, 499500, 499500>  (00:00:03.4586021)

如您所见,这两种自定义向量类型的性能都比 差得多System.Numerics.Vector4,尽管在System.Numerics.Vector4内部使用的仍然比不使用的要好一些。

所以重申我的问题,有没有办法让自定义向量类型像System.Numerics.Vector4?

标签: c#.net.net-core

解决方案


感谢 Christopher 在我最初的问题的评论中,我能够弄清楚只需将in关键字添加到leftandright运算符参数(并重写它们以创建新的返回值而不是修改left值)就足以触发优化。此更新后的代码为所有三个向量变体提供了基本相同的性能:

using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

class Program
{
    private const int ARR_LENGTH = 1000;
    private const int OUTER_LOOP = 1000000;

    static void Main(string[] args)
    {
        TestVector4();
        TestMyVector();
        TestMyVectorSimd();
    }

    static void TestVector4()
    {
        Vector4[] arr = new Vector4[ARR_LENGTH];
        for(int i = 0; i < arr.Length; i++)
            arr[i] = new Vector4(i, i, i, i);

        Stopwatch sw = Stopwatch.StartNew();
        Vector4 total = default;
        for(int i = 0; i < OUTER_LOOP; i++)
        {
            total = default;
            for(int j = 0; j < ARR_LENGTH; j++)
                total += arr[j];
        }
        sw.Stop();

        Console.WriteLine($"System.Numerics.Vector4: {total}  ({sw.Elapsed})");
    }

    static void TestMyVector()
    {
        MyVector[] arr = new MyVector[ARR_LENGTH];
        for(int i = 0; i < arr.Length; i++)
            arr[i] = new MyVector(i, i, i, i);

        Stopwatch sw = Stopwatch.StartNew();
        MyVector total = default;
        for(int i = 0; i < OUTER_LOOP; i++)
        {
            total = default;
            for(int j = 0; j < ARR_LENGTH; j++)
                total += arr[j];
        }
        sw.Stop();

        Console.WriteLine($"MyVector: {total}  ({sw.Elapsed})");
    }

    static void TestMyVectorSimd()
    {
        MyVectorSimd[] arr = new MyVectorSimd[ARR_LENGTH];
        for(int i = 0; i < arr.Length; i++)
            arr[i] = new MyVectorSimd(i, i, i, i);

        Stopwatch sw = Stopwatch.StartNew();
        MyVectorSimd total = default;
        for(int i = 0; i < OUTER_LOOP; i++)
        {
            total = default;
            for(int j = 0; j < ARR_LENGTH; j++)
                total += arr[j];
        }
        sw.Stop();

        Console.WriteLine($"MyVectorSimd: {total}  ({sw.Elapsed})");
    }
}

struct MyVector
{
    public float X, Y, Z, W;

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public MyVector(float x, float y, float z, float w)
    {
        X = x;
        Y = y;
        Z = z;
        W = w;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public override string ToString()
    {
        return $"<{X}, {Y}, {Z}, {W}>";
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public static MyVector operator +(in MyVector left, in MyVector right)
    {
        return new MyVector(left.X + right.X, left.Y + right.Y, left.Z + right.Z, left.W + right.W);
    }
}

struct MyVectorSimd
{
    public Vector4 V;

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public MyVectorSimd(float x, float y, float z, float w)
        : this()
    {
        V = new Vector4(x, y, z, w);
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public MyVectorSimd(Vector4 v)
        : this()
    {
        V = v;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public override string ToString()
    {
        return V.ToString();
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
    public static MyVectorSimd operator +(in MyVectorSimd left, in MyVectorSimd right)
    {
        return new MyVectorSimd(left.V + right.V);
    }
}

结果:

System.Numerics.Vector4: <499500, 499500, 499500, 499500>  (00:00:00.9987530)
MyVector: <499500, 499500, 499500, 499500>  (00:00:01.0064586)
MyVectorSimd: <499500, 499500, 499500, 499500>  (00:00:00.9739642)

编辑:这适用于 Vector4,但出于某种原因不适用于 Vector2 或 Vector3。仍在寻找有关如何创建与 System.Numerics 中的性能相似的自定义 Vector2 或 Vector3 的答案。


推荐阅读