c# - 偏移大数组的 C# 内存访问优化(为什么这段代码很慢)
问题描述
我正在使用 C# 进行图像处理。我在 C# 中获得偏移大内存访问的性能时遇到问题。速度与相同大小的零偏移内存明显不同。在 C++ 的情况下,差异没有 C# 中那么大。
你能告诉我为什么我的代码有这个问题吗?另外,有什么解决办法吗?
资源
using System;
using System.Runtime.InteropServices;
using System.Diagnostics;
using System.Numerics;
namespace Test
{
class Program
{
unsafe static void Main(string[] args)
{
var width = 8000;
var height = 8000;
// var data = new Vector4[height * width]; <- similar problem occur
var data = (Vector4*)Marshal.AllocHGlobal(height * width * sizeof(Vector4));
var data2 = (Vector4*)Marshal.AllocHGlobal(height * width * sizeof(Vector4));
// MATRIX
float m11 = .7297023F, m12 = 0, m13 = 0, m14 = 0, m21 = 0, m22 = .6109577F,
m23 = 0, m24 = 0, m31 = 0, m33 = .597218F, m32 = 0, m34 = 0, m41 = 0, m42 = 0,
m43 = 0, m44 = 1F, m51 = .105F, m52 = .145F, m53 = .155F, m54 = 0;
var sw = new Stopwatch();
sw.Start();
for (int y = 0; y < height; ++y)
{
var offset = width * y;
for (int x = 0; x < width; ++x)
{
// Slow ( 600ms )
ref var sData = ref data[offset + x];
ref var dData = ref data2[offset + x];
// Fast ( 200ms )
// ref var sData = ref data[x];
// ref var dData = ref data2[x];
float b = sData.X;
float g = sData.Y;
float r = sData.Z;
float a = sData.W;
dData.X = (b * m11) + (g * m21) + (r * m31) + (a * m41) + m51;
dData.Y = (b * m12) + (g * m22) + (r * m32) + (a * m42) + m52;
dData.Z = (b * m13) + (g * m23) + (r * m33) + (a * m43) + m53;
dData.W = (b * m14) + (g * m24) + (r * m34) + (a * m44) + m54;
}
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Marshal.FreeHGlobal((IntPtr)data);
Marshal.FreeHGlobal((IntPtr)data2);
}
}
}
使用托管数组指针时
var array1 = new Vector4[width * height];
var array2 = new Vector4[width * height];
fixed (Vector4* data = &array1[0])
fixed (Vector4* data2 = &array2[0])
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
// Slow ( 600ms )
ref var sData = ref data[width * y + x];
ref var dData = ref data2[width * y + x];
在外循环中偏移指针
(有点改进)</p>
for (int y = 0; y < height; ++y)
{
var offsetData1 = data + width * y;
var offsetData2 = data2 + width * y;
for (int x = 0; x < width; ++x)
{
// Slow ( 470ms )
ref var sData = ref offsetData1[x];
ref var dData = ref offsetData2[x];
C++版本
#include <iostream>
#include <chrono>
struct Vector4 {
float X = 0;
float Y = 0;
float Z = 0;
float W = 0;
};
int main()
{
long width = 8000;
long height = 8000;
auto buffer = new Vector4[width * height];
auto buffer2 = new Vector4[width * height];
// MATRIX
float m11 = .7297023F, m12 = 0, m13 = 0, m14 = 0, m21 = 0, m22 = .6109577F,
m23 = 0, m24 = 0, m31 = 0, m33 = .597218F, m32 = 0, m34 = 0, m41 = 0, m42 = 0,
m43 = 0, m44 = 1, m51 = .105F, m52 = .145F, m53 = .155F, m54 = 0;
std::chrono::system_clock::time_point start, end;
start = std::chrono::system_clock::now();
for (int y = 0; y < height; ++y)
{
int offset = width * y;
for (int x = 0; x < width; ++x)
{
Vector4& sData = buffer[offset + x];
Vector4& dData = buffer2[offset + x];
float b = sData.X;
float g = sData.Y;
float r = sData.Z;
float a = sData.W;
dData.X = (b * m11) + (g * m21) + (r * m31) + (a * m41) + m51;
dData.Y = (b * m12) + (g * m22) + (r * m32) + (a * m42) + m52;
dData.Z = (b * m13) + (g * m23) + (r * m33) + (a * m43) + m53;
dData.W = (b * m14) + (g * m24) + (r * m34) + (a * m44) + m54;
}
}
end = std::chrono::system_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << elapsed << "\n";
delete[] buffer;
delete[] buffer2;
}
基准
朗 | 描述 | 时间(毫秒) |
---|---|---|
C# | 零偏移指针 | 600毫秒 |
C# | 偏移指针 | 200毫秒 |
C++ | 零偏移指针 | 190 毫秒 |
C++ | 偏移指针 | 260毫秒 |
C# | 在外循环中偏移指针 | 370毫秒 |
C# | 带偏移量的托管数组指针 | 990 毫秒 |
其他信息
中央处理器 | 英特尔酷睿 i7-6700k |
记忆 | DDR4 16GB |
操作系统 | 视窗 10 20H2 |
运行 | .NET 5 |
朗维 | C# 9 |
平台 | X64 |
解决方案
推荐阅读
- java - 将容器连接到mysql数据库并访问它
- angular - show navigation only in inner pages and not on login/registration pages
- flutter - 如何实现 Flutter 功能
- gmail-api - 请Gmail API html帖子示例
- java - 识别来自 git 存储库的参数更改
- android - Multiple checkbox state
- unit-testing - How to implement stub in Golang? And what difference between stub and mock?
- python - 如何获得shopify商店产品
- c# - sqlreader reading wrong column
- java - 通过Java中的构造函数调用没有实例的方法