c# - 是否可以创建性能类似于 System.Numerics.Vector4 的自定义向量类型?
问题描述
该System.Numerics.Vector4
类型具有出色的性能,因为 CLR 可以对其进行优化以使用矢量化 CPU 指令。但是,我想创建自己的自定义 4 元素、单精度浮点向量类型,以便我可以添加各种方便的方法和属性、属性、接口等......(IE,比我可以用扩展方法做的更多) .) 不幸的是,我自己的向量类型的性能几乎不如System.Numerics.Vector4
,即使它在System.Numerics.Vector4
内部使用 a 也是如此。有没有办法System.Numerics.Vector4
从自定义向量类型中获得类似的性能?
这是一个程序,它尝试(并且大部分失败)通过System.Numerics.Vector4
在自定义向量类型中嵌套 a 来提高性能:
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
class Program
{
private const int ARR_LENGTH = 1000;
private const int OUTER_LOOP = 1000000;
static void Main(string[] args)
{
TestVector4();
TestMyVector();
TestMyVectorSimd();
}
static void TestVector4()
{
Vector4[] arr = new Vector4[ARR_LENGTH];
for(int i = 0; i < arr.Length; i++)
arr[i] = new Vector4(i, i, i, i);
Stopwatch sw = Stopwatch.StartNew();
Vector4 total = default;
for(int i = 0; i < OUTER_LOOP; i++)
{
total = default;
for(int j = 0; j < ARR_LENGTH; j++)
total += arr[j];
}
sw.Stop();
Console.WriteLine($"System.Numerics.Vector4: {total} ({sw.Elapsed})");
}
static void TestMyVector()
{
MyVector[] arr = new MyVector[ARR_LENGTH];
for(int i = 0; i < arr.Length; i++)
arr[i] = new MyVector(i, i, i, i);
Stopwatch sw = Stopwatch.StartNew();
MyVector total = default;
for(int i = 0; i < OUTER_LOOP; i++)
{
total = default;
for(int j = 0; j < ARR_LENGTH; j++)
total += arr[j];
}
sw.Stop();
Console.WriteLine($"MyVector: {total} ({sw.Elapsed})");
}
static void TestMyVectorSimd()
{
MyVectorSimd[] arr = new MyVectorSimd[ARR_LENGTH];
for(int i = 0; i < arr.Length; i++)
arr[i] = new MyVectorSimd(i, i, i, i);
Stopwatch sw = Stopwatch.StartNew();
MyVectorSimd total = default;
for(int i = 0; i < OUTER_LOOP; i++)
{
total = default;
for(int j = 0; j < ARR_LENGTH; j++)
total += arr[j];
}
sw.Stop();
Console.WriteLine($"MyVectorSimd: {total} ({sw.Elapsed})");
}
}
struct MyVector
{
public float X, Y, Z, W;
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public MyVector(float x, float y, float z, float w)
{
X = x;
Y = y;
Z = z;
W = w;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public override string ToString()
{
return $"<{X}, {Y}, {Z}, {W}>";
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static MyVector operator +(MyVector left, MyVector right)
{
left.X += right.X;
left.Y += right.Y;
left.Z += right.Z;
left.W += right.W;
return left;
}
}
struct MyVectorSimd
{
public Vector4 V;
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public MyVectorSimd(float x, float y, float z, float w)
{
V = new Vector4(x, y, z, w);
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public override string ToString()
{
return V.ToString();
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static MyVectorSimd operator +(MyVectorSimd left, MyVectorSimd right)
{
left.V += right.V;
return left;
}
}
这个程序有 3 个测试。第一个测试的性能System.Numerics.Vector4
。第二个MyVector
使用简单的逐个元素添加测试自定义向量类型的性能。第三个测试MyVectorSimd
本身使用嵌套的自定义向量类型的性能System.Numerics.Vector4
。以下是在 .Net Core 3.1 上运行 Release 版本的计算机上的结果:
System.Numerics.Vector4: <499500, 499500, 499500, 499500> (00:00:01.0635501)
MyVector: <499500, 499500, 499500, 499500> (00:00:04.8566430)
MyVectorSimd: <499500, 499500, 499500, 499500> (00:00:03.4586021)
如您所见,这两种自定义向量类型的性能都比 差得多System.Numerics.Vector4
,尽管在System.Numerics.Vector4
内部使用的仍然比不使用的要好一些。
所以重申我的问题,有没有办法让自定义向量类型像System.Numerics.Vector4
?
解决方案
感谢 Christopher 在我最初的问题的评论中,我能够弄清楚只需将in
关键字添加到left
andright
运算符参数(并重写它们以创建新的返回值而不是修改left
值)就足以触发优化。此更新后的代码为所有三个向量变体提供了基本相同的性能:
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
class Program
{
private const int ARR_LENGTH = 1000;
private const int OUTER_LOOP = 1000000;
static void Main(string[] args)
{
TestVector4();
TestMyVector();
TestMyVectorSimd();
}
static void TestVector4()
{
Vector4[] arr = new Vector4[ARR_LENGTH];
for(int i = 0; i < arr.Length; i++)
arr[i] = new Vector4(i, i, i, i);
Stopwatch sw = Stopwatch.StartNew();
Vector4 total = default;
for(int i = 0; i < OUTER_LOOP; i++)
{
total = default;
for(int j = 0; j < ARR_LENGTH; j++)
total += arr[j];
}
sw.Stop();
Console.WriteLine($"System.Numerics.Vector4: {total} ({sw.Elapsed})");
}
static void TestMyVector()
{
MyVector[] arr = new MyVector[ARR_LENGTH];
for(int i = 0; i < arr.Length; i++)
arr[i] = new MyVector(i, i, i, i);
Stopwatch sw = Stopwatch.StartNew();
MyVector total = default;
for(int i = 0; i < OUTER_LOOP; i++)
{
total = default;
for(int j = 0; j < ARR_LENGTH; j++)
total += arr[j];
}
sw.Stop();
Console.WriteLine($"MyVector: {total} ({sw.Elapsed})");
}
static void TestMyVectorSimd()
{
MyVectorSimd[] arr = new MyVectorSimd[ARR_LENGTH];
for(int i = 0; i < arr.Length; i++)
arr[i] = new MyVectorSimd(i, i, i, i);
Stopwatch sw = Stopwatch.StartNew();
MyVectorSimd total = default;
for(int i = 0; i < OUTER_LOOP; i++)
{
total = default;
for(int j = 0; j < ARR_LENGTH; j++)
total += arr[j];
}
sw.Stop();
Console.WriteLine($"MyVectorSimd: {total} ({sw.Elapsed})");
}
}
struct MyVector
{
public float X, Y, Z, W;
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public MyVector(float x, float y, float z, float w)
{
X = x;
Y = y;
Z = z;
W = w;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public override string ToString()
{
return $"<{X}, {Y}, {Z}, {W}>";
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static MyVector operator +(in MyVector left, in MyVector right)
{
return new MyVector(left.X + right.X, left.Y + right.Y, left.Z + right.Z, left.W + right.W);
}
}
struct MyVectorSimd
{
public Vector4 V;
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public MyVectorSimd(float x, float y, float z, float w)
: this()
{
V = new Vector4(x, y, z, w);
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public MyVectorSimd(Vector4 v)
: this()
{
V = v;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public override string ToString()
{
return V.ToString();
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public static MyVectorSimd operator +(in MyVectorSimd left, in MyVectorSimd right)
{
return new MyVectorSimd(left.V + right.V);
}
}
结果:
System.Numerics.Vector4: <499500, 499500, 499500, 499500> (00:00:00.9987530)
MyVector: <499500, 499500, 499500, 499500> (00:00:01.0064586)
MyVectorSimd: <499500, 499500, 499500, 499500> (00:00:00.9739642)
编辑:这适用于 Vector4,但出于某种原因不适用于 Vector2 或 Vector3。仍在寻找有关如何创建与 System.Numerics 中的性能相似的自定义 Vector2 或 Vector3 的答案。
推荐阅读
- php - if 语句中的增量值不显示在 else 语句中
- wordpress - WordPress:站点地图包含垃圾邮件网址
- arrays - 使用数组和函数打印 C 中的前 n 个元素 [就像 sort(a+m, a+n)]
- c# - 使用 LINQ 连接具有相同属性的字符串
- c - 如何仅将 txt 文件中的整数值读取到数组中?
- android - Flutter - 如果日期相同(今天),则 DatePicker 不会打开
- java - 使用从应用程序属性填充的 Hashmap 的自动装配 bean 不起作用
- git - 如何检查gerrit CL中是否未添加组?
- kivy - 基于其他小部件位置的 Kivy 位置线
- github - gitignore 问题 - 如何将 Windows 快捷方式文件添加到 gitignore