首页 > 解决方案 > 为什么有些用 Golang 编写的函数运行速度比用 Java 还要慢?

问题描述

我用 Golang 和 Java 测试了几个简单的函数。令我惊讶的是,Java 有时比 Golang 更快(尤其是在递归函数和标准库中的某些函数,例如 math/rand.Rand)。我想知道为什么。这是我用于测试和结果的一些代码。

戈朗代码:

package main

import (
    "fmt"
    "math/rand"
    "time"
)

func calPi(pointCount int) float64 {
    inCircleCount := 0

    var x, y float64
    var Pi float64

    for i := 0; i < pointCount; i++ {
        x = rand.Float64()
        y = rand.Float64()

        if x*x+y*y < 1 {
            inCircleCount++
        }
    }

    Pi = (4.0 * float64(inCircleCount)) / float64(pointCount)

    return Pi
}

func fibonacci(c int64) int64 {
    if c < 2 {
        return c
    }

    return fibonacci(c-2) + fibonacci(c-1)
}

func main() {
    rand.Seed(time.Now().Unix()) 

    fmt.Printf("Test 1\n")

    startTime := time.Now()

    result := 0.0

    for i := 0.0; i < 1000000000; i = i + 1 {
        result += i * i
    }

    endTime := time.Now()

    fmt.Printf("Result: %v\n", result)

    fmt.Printf("Duration: %v\n", endTime.Sub(startTime))

    fmt.Printf("Test 2\n")

    startTime = time.Now()

    resultInt := fibonacci(50)

    endTime = time.Now()

    fmt.Printf("Result: %v\n", resultInt)

    fmt.Printf("Duration: %v\n", endTime.Sub(startTime))

    fmt.Printf("Test 3\n")

    startTime = time.Now()

    result = 0.0

    for i := 0.0; i < 100000000; i = i + 1 {
        result += rand.Float64()
    }

    endTime = time.Now()

    fmt.Printf("Result: %v\n", result)

    fmt.Printf("Duration: %v\n s", endTime.Sub(startTime))

    fmt.Printf("Test 4\n")

    startTime = time.Now()

    result = calPi(100000000)

    endTime = time.Now()

    fmt.Printf("Result: %v\n", result)

    fmt.Printf("Duration: %v s\n", endTime.Sub(startTime))

}

结果:

Test 1
Result: 3.333333328333552e+26
Duration: 1.449212507s
Test 2
Result: 12586269025
Duration: 1m31.645050682s
Test 3
Result: 4.999483069673434e+07
Duration: 2.534121566s
 sTest 4
Result: 3.14147056
Duration: 5.036491495s s

Java代码:

public class Performance {

    public static double calPi(int pointCount) {
        int inCircleCount = 0;

        double x, y;
        double Pi;

        for (int i = 0; i < pointCount; i++) {
            x = Math.random();
            y = Math.random();

            if (x * x + y * y < 1) {
                inCircleCount++;
            }
        }

        Pi = (4.0 * inCircleCount) / pointCount;

        return Pi;
    }

    public static double cal(double a, double b, double c) {
        return a * b / (c + 1) + a;
    }

    public static long fibonacci(long c) {
        if (c < 2)
            return c;
        return fibonacci(c - 2) + fibonacci(c - 1);
    }

    public static void main(String[] args) {

        System.out.println("Test 1");

        long startTime = System.currentTimeMillis();

        double result = 0.0;

        for (double i = 0.0; i < 1000000000; i = i + 1) {
            result += i * i;
        }

        long endTime = System.currentTimeMillis();

        float duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 2");

        startTime = System.currentTimeMillis();

        long resultInt = fibonacci(50);

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + resultInt);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 3");

        startTime = System.currentTimeMillis();

        result = 0.0;

        for (double i = 0; i < 100000000; i = i + 1) {
            result += Math.random();
        }

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 4");

        startTime = System.currentTimeMillis();

        result = calPi(100000000);

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

    }
}

结果:

Test 1
Result: 3.333333328333552E26
Duration: 2.948 s
Test 2
Result: 12586269025
Duration: 60.816 s
Test 3
Result: 4.9999087237930864E7
Duration: 2.448 s
Test 4
Result: 3.14147284
Duration: 4.786 s

测试2结果的差异真的让我震惊!请帮我找出原因,谢谢。如果有人能给我举个例子来展示 Golang(与 Java)的优势,那就更好了。

标签: javaperformancego

解决方案


Java 和 Golang 程序在执行之前都被编译成机器语言——这就是 JIT 代表 Java VM。作为性能比较,每个生成的机器代码之间肯定存在不那么细微的差异。

不幸的是,我无法访问 Java JIT 编译器生成的机器代码,但我们可以看看 Go 编译器 (v1.11.4-amd64) 为fibonacci函数生成了什么:

        # Do the comparison
        MOVQ    "c", AX
        CMPQ    AX, $2
        JGE     @ELSE
        # Save the func result
        MOVQ    AX, "r"
        # Clean up and return
        MOVQ    24(SP), BP
        ADDQ    $32, SP
        RET
@ELSE:
        # Compute fib(c - 2)
        LEAQ    -2(AX), CX
        MOVQ    CX, (SP)
        CALL    fibonacci
        # Save the call result
        MOVQ    8(SP), AX
        MOVQ    AX, "temp"
        # Compute fib(c - 1)
        MOVQ    "c", CX
        DECQ    CX
        MOVQ    CX, (SP)
        CALL    fibonacci
        # Add previous results together
        MOVQ    16(SP), AX
        ADDQ    8(SP), AX
        # Save the func result
        MOVQ    AX, "r"
        # Clean up and return
        MOVQ    24(SP), BP
        ADDQ    $32, SP
        RET

请注意,此代码不是完全相同的输出,但我对其进行了一些修改以使其更清晰。引用的变量是堆栈位置。

我得出的结论是,虽然 Go 编译器确实采用了一些优化技术来生成性能更高的代码(请参阅编译器优化),但它在分配 CPU 寄存器方面表现不佳(与 C 编译器生成的相比),并且依赖堆栈上太多了,尤其是返回值——我认为这可能与语言的工作方式有关(例如多个返回值)。

更新 1

只是为了比较,这是 GCC (amd64) 为相同功能生成的机器代码:

        pushq %rbp
        movq  %rsp, %rbp
        pushq %r14
        pushq %rbx
        # Do the comparison
        movq  %rdi, %rbx
        cmpq  $2, %rbx
        jge @ELSE
        # Save "c" in "r"
        movq  %rbx, %rax
        jmp @RETURN
@ELSE:
        # Compute fib(i - 2)
        leaq  -2(%rbx), %rdi
        callq fibonacci
        # Compute fib(i - 1)
        movq  %rax, %r14
        decq  %rbx
        movq  %rbx, %rdi
        callq fibonacci
        # Add previous results together
        addq  %r14, %rax
@RETURN:
        popq  %rbx
        popq  %r14
        popq  %rbp
        retq

更新 2

话虽如此,我坚信在实际项目中,语言运行时(例如对象分配、垃圾收集、调用间接、动态加载、并发支持等)将对程序的整体性能产生更大的影响,而不是功能层面的微优化。


推荐阅读