首页 > 解决方案 > 为什么我的 D 代码查找素数比我的 C++ 代码快得多?

问题描述

我分别用 C++ 和 D(lang) 编写了两个小项目来计算给定数量的素数。两个项目中的代码非常相似。然而,我的 D 代码比我的 C++ 代码运行得快得多,尽管据说 C++ 更快。我使用 dmd 和 dub 来编译 D 代码,并使用 clang (LLVM 11.0) 和 Visual C++ 来编译我的 C++ 代码。我使用 Visual Studio Code 从命令行实际开发和编译我的 C++ 程序,尽管使用 -O3。如果某些变量名称不匹配,我很抱歉,我很快将我的代码从德语翻译过来。下面是我的代码:

C++ 实现:

主.cpp:

#include <iostream>
#include <chrono>
#include <vector>

#include "isqrt.hpp"

bool prime(int number)
{
    for(unsigned i = 2; i < isqrt(number)+1; i++)
    {
        if (!(number % i))
        {
            return false;
        }
    }
    return true;
}

int main(int argc, char *argv[])
{
    std::cout << "Prime numbers\n-------------------------\n" << std::endl;
    while (true)
    {
        std::cout << "Please enter the amount of prime numbers you want to get calculated: ";
        unsigned amount;
        std::cin >> amount;

        std::vector<unsigned> prime_numbers = {2,3,5};
        unsigned start = 6;
        bool p;
        std::chrono::system_clock::time_point before = std::chrono::system_clock::now();
        while(prime_numbers.size() < amount)
        {
            p = prime(start);
            if(p)
            {
                prime_numbers.push_back(start);
            }
            start++;
        }
        std::chrono::system_clock::time_point after = std::chrono::system_clock::now();
        //std::cout << prime_numbers << std::endl;
        std::chrono::system_clock::duration diff = after - before;
        std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << std::endl;
    }
    std::cout << "Why has thou forsaken me?" << std::endl;
    return 0;
}

iqrt.hpp:

#ifndef ISQRT_HPP
#define ISQRT_HPP

unsigned isqrt(unsigned number);

#endif

iqrt.cpp:

#include "isqrt.hpp"

unsigned isqrt(unsigned number) {
    if (!number) return 0;

    unsigned left = 1;
    unsigned right = (number >> 1) + 1;
    unsigned res;
    unsigned mid;
 
    while (left <= right) {
        mid = left + ((right-left) >> 1);
        if (mid*mid < number){
            left = mid+1;
            res=mid;
        }
        else {
            right=mid-1;
        }
    }
 
    return res;
}

D 实施:

主.d:

import std.stdio;
import std.datetime.stopwatch;

import isqrt;

/** Whether the number is a prime */
bool prime(int number)
{
    foreach(i; 2 .. iSqrt(number)+1)
    {
        if (!(number % i))
        {
            return false;
        }
    }
    return true;
}

void main()
{
    writeln("Prime numbers\n-------------------------\n");
    auto sw = StopWatch(AutoStart.no);
    int amount;
    while (true)
    {
        sw.reset();
        write("Please enter the amount of prime numbers that are to be calculated: ");
        readf("%d\n",amount);

        int[] prime_numbers = [2,3];
        int start = 5;
        bool p;
        sw.start();
        while(prime_numbers.length < amount)
        {
            p = prime(start);
            if(p)
            {
                primzahlen ~= start;
            }
            start++;
        }
        sw.stop();
        //writefln("%(%s%|, %)\n",prime_numbers);
        writeln(sw.peek.total!"msecs");
    }
}

iqrt.d:

module isqrt;

/** Int squareroot */

public uint iSqrt(uint number) {
    if (!number) return 0;

    uint left = 1;
    uint right = number >> 1 + 1;
    uint res;
    uint mid;
 
    while (left <= right) {
        mid = left + ((right-left) >> 1);
        if (mid<=number/mid){
            left = mid+1;
            res=mid;
        }
        else {
            right=mid-1;
        }
    }
 
    return res;
}

标签: c++d

解决方案


如果你稍微改变你的 main.cpp

diff main.old.cpp main.cpp
9c9,10
<     for(unsigned i = 2; i < isqrt(number)+1; i++)
---
>     int max = isqrt(number) + 1;
>     for(unsigned i = 2; i < max; i++)

并再次重建您的 C++ 代码

g++ -O3 -o cppisqrt isqrt.cpp main.cpp

你会看到 C++ 版本只比 D 版本慢一点

gdc -O3 -o disqrt isqrt.d main.d

我想公平一点,并使用相同的编译器后端(G++ 和 GDC 都使用 GCC 后端)。在我的家庭工作站上,C++ 变体需要约 1140 毫秒(10 个样本的平均值)来处理 200000 个数字,而 D 变体需要约 1090 毫秒(也是 10 个样本的平均值)。

G++ 和 GDC 都产生类似的代码:

https://godbolt.org上的 GCC 10.2 x86-64产生

isqrt(unsigned int):
        push    rbp
        mov     rbp, rsp
        mov     DWORD PTR [rbp-20], edi
        cmp     DWORD PTR [rbp-20], 0
        jne     .L2
        mov     eax, 0
        jmp     .L3
.L2:
        mov     DWORD PTR [rbp-4], 1
        mov     eax, DWORD PTR [rbp-20]
        shr     eax
        add     eax, 1
        mov     DWORD PTR [rbp-8], eax
.L7:
        mov     eax, DWORD PTR [rbp-4]
        cmp     eax, DWORD PTR [rbp-8]
        ja      .L4
        mov     eax, DWORD PTR [rbp-8]
        sub     eax, DWORD PTR [rbp-4]
        shr     eax
        mov     edx, eax
        mov     eax, DWORD PTR [rbp-4]
        add     eax, edx
        mov     DWORD PTR [rbp-16], eax
        mov     eax, DWORD PTR [rbp-16]
        imul    eax, eax
        cmp     DWORD PTR [rbp-20], eax
        jbe     .L5
        mov     eax, DWORD PTR [rbp-16]
        add     eax, 1
        mov     DWORD PTR [rbp-4], eax
        mov     eax, DWORD PTR [rbp-16]
        mov     DWORD PTR [rbp-12], eax
        jmp     .L7
.L5:
        mov     eax, DWORD PTR [rbp-16]
        sub     eax, 1
        mov     DWORD PTR [rbp-8], eax
        jmp     .L7
.L4:
        mov     eax, DWORD PTR [rbp-12]
.L3:
        pop     rbp
        ret

https://godbolt.org上的 GDC 10.2 x86-64产生

uint example.iSqrt(uint):
        push    rbp
        mov     rbp, rsp
        mov     DWORD PTR [rbp-20], edi
        cmp     DWORD PTR [rbp-20], 0
        jne     .L2
        mov     eax, 0
        jmp     .L3
.L2:
        mov     DWORD PTR [rbp-4], 1
        mov     eax, DWORD PTR [rbp-20]
        shr     eax, 2
        mov     DWORD PTR [rbp-8], eax
        mov     DWORD PTR [rbp-12], 0
        mov     DWORD PTR [rbp-16], 0
.L7:
        mov     eax, DWORD PTR [rbp-4]
        cmp     eax, DWORD PTR [rbp-8]
        ja      .L4
        mov     eax, DWORD PTR [rbp-8]
        sub     eax, DWORD PTR [rbp-4]
        shr     eax
        mov     edx, eax
        mov     eax, DWORD PTR [rbp-4]
        add     eax, edx
        mov     DWORD PTR [rbp-16], eax
        mov     eax, DWORD PTR [rbp-20]
        mov     edx, 0
        div     DWORD PTR [rbp-16]
        cmp     DWORD PTR [rbp-16], eax
        ja      .L5
        mov     eax, DWORD PTR [rbp-16]
        add     eax, 1
        mov     DWORD PTR [rbp-4], eax
        mov     eax, DWORD PTR [rbp-16]
        mov     DWORD PTR [rbp-12], eax
        jmp     .L7
.L5:
        mov     eax, DWORD PTR [rbp-16]
        sub     eax, 1
        mov     DWORD PTR [rbp-8], eax
        jmp     .L7
.L4:
        mov     eax, DWORD PTR [rbp-12]
.L3:
        pop     rbp
        ret

PS。public在这种情况下,D 版本不需要该说明符。


推荐阅读