c++ - 为什么 Go 套接字比 C++ 套接字慢?
问题描述
我在 Go 和 C++ 中对一个简单的套接字乒乓测试进行了基准测试。客户端首先向服务器发送 0。服务器增加它获得的任何数字并将其发送回客户端。客户端将数字回显给服务器,并在数字为 1,000,000 时停止。
客户端和服务器都在同一台计算机上,所以我在这两种情况下都使用 Unix 套接字。(我还尝试了相同主机的 TCP 套接字,结果相似)。
Go 测试需要 14 秒,而 C++ 测试需要 8 秒。这让我感到惊讶,因为我已经运行了相当多的 Go 与 C++ 基准测试,而且通常只要我不触发垃圾收集器,Go 的性能与 C++ 一样。
我使用的是 Mac,尽管评论者也报告说 Go 版本在 Linux 上速度较慢。
想知道我是否错过了优化 Go 程序的方法,或者是否只是效率低下。
以下是我为执行测试而运行的命令以及测试结果。所有代码文件都粘贴在这个问题的底部。
运行 Go 服务器:
$ rm /tmp/go.sock
$ go run socketUnixServer.go
运行 Go 客户端:
$ go build socketUnixClient.go; time ./socketUnixClient
real 0m14.101s
user 0m5.242s
sys 0m7.883s
运行 C++ 服务器:
$ rm /tmp/cpp.sock
$ clang++ -std=c++11 tcpServerIncUnix.cpp -O3; ./a.out
运行 C++ 客户端:
$ clang++ -std=c++11 tcpClientIncUnix.cpp -O3; time ./a.out
real 0m8.690s
user 0m0.835s
sys 0m3.800s
代码文件
转到服务器:
// socketUnixServer.go
package main
import (
"log"
"net"
"encoding/binary"
)
func main() {
ln, err := net.Listen("unix", "/tmp/go.sock")
if err != nil {
log.Fatal("Listen error: ", err)
}
c, err := ln.Accept()
if err != nil {
panic(err)
}
log.Println("Connected with client!")
readbuf := make([]byte, 4)
writebuf := make([]byte, 4)
for {
c.Read(readbuf)
clientNum := binary.BigEndian.Uint32(readbuf)
binary.BigEndian.PutUint32(writebuf, clientNum+1)
c.Write(writebuf)
}
}
去客户端:
// socketUnixClient.go
package main
import (
"log"
"net"
"encoding/binary"
)
const N = 1000000
func main() {
c, err := net.Dial("unix", "/tmp/go.sock")
if err != nil {
log.Fatal("Dial error", err)
}
defer c.Close()
readbuf := make([]byte, 4)
writebuf := make([]byte, 4)
var currNumber uint32 = 0
for currNumber < N {
binary.BigEndian.PutUint32(writebuf, currNumber)
c.Write(writebuf)
// Read the incremented number from server
c.Read(readbuf[:])
currNumber = binary.BigEndian.Uint32(readbuf)
}
}
C++ 服务器:
// tcpServerIncUnix.cpp
// Server side C/C++ program to demonstrate Socket programming
// #include <iostream>
#include <unistd.h>
#include <stdio.h>
#include <sys/un.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <string.h>
#include <unistd.h>
// Big Endian (network order)
unsigned int fromBytes(unsigned char b[4]) {
return b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24;
}
void toBytes(unsigned int x, unsigned char (&b)[4]) {
b[3] = x;
b[2] = x>>8;
b[1] = x>>16;
b[0] = x>>24;
}
int main(int argc, char const *argv[])
{
int server_fd, new_socket, valread;
struct sockaddr_un saddr;
int saddrlen = sizeof(saddr);
unsigned char recv_buffer[4] = {0};
unsigned char send_buffer[4] = {0};
server_fd = socket(AF_UNIX, SOCK_STREAM, 0);
saddr.sun_family = AF_UNIX;
strncpy(saddr.sun_path, "/tmp/cpp.sock", sizeof(saddr.sun_path));
saddr.sun_path[sizeof(saddr.sun_path)-1] = '\0';
bind(server_fd, (struct sockaddr *)&saddr, sizeof(saddr));
listen(server_fd, 3);
// Accept one client connection
new_socket = accept(server_fd, (struct sockaddr *)&saddr, (socklen_t*)&saddrlen);
printf("Connected with client!\n");
// Note: if /tmp/cpp.sock already exists, you'll get the Connected with client!
// message before running the client. Delete this file first.
unsigned int x = 0;
while (true) {
valread = read(new_socket, recv_buffer, 4);
x = fromBytes(recv_buffer);
toBytes(x+1, send_buffer);
write(new_socket, send_buffer, 4);
}
}
C++ 客户端:
// tcpClientIncUnix.cpp
// Server side C/C++ program to demonstrate Socket programming
// #include <iostream>
#include <unistd.h>
#include <stdio.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <string.h>
#include <unistd.h>
// Big Endian (network order)
unsigned int fromBytes(unsigned char b[4]) {
return b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24;
}
void toBytes(unsigned int x, unsigned char (&b)[4]) {
b[3] = x;
b[2] = x>>8;
b[1] = x>>16;
b[0] = x>>24;
}
int main(int argc, char const *argv[])
{
int sock, valread;
struct sockaddr_un saddr;
int opt = 1;
int saddrlen = sizeof(saddr);
// We'll be passing uint32's back and forth
unsigned char recv_buffer[4] = {0};
unsigned char send_buffer[4] = {0};
sock = socket(AF_UNIX, SOCK_STREAM, 0);
saddr.sun_family = AF_UNIX;
strncpy(saddr.sun_path, "/tmp/cpp.sock", sizeof(saddr.sun_path));
saddr.sun_path[sizeof(saddr.sun_path)-1] = '\0';
// Accept one client connection
if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0) {
throw("connect failed");
}
int n = 1000000;
unsigned int currNumber = 0;
while (currNumber < n) {
toBytes(currNumber, send_buffer);
write(sock, send_buffer, 4);
// Read the incremented number from server
valread = read(sock, recv_buffer, 4);
currNumber = fromBytes(recv_buffer);
}
}
解决方案
首先,我确认这个问题中的 Go 程序的运行速度明显比 C++ 慢。我认为知道原因确实很有趣。
我用 分析了 Go 客户端和服务器,pprof
发现它syscall.Syscall
占用了总执行时间的 70%。根据这张票,Go 中的系统调用大约比 C 中慢 1.4 倍。
(pprof) top -cum
Showing nodes accounting for 18.78s, 67.97% of 27.63s total
Dropped 44 nodes (cum <= 0.14s)
Showing top 10 nodes out of 44
flat flat% sum% cum cum%
0.11s 0.4% 0.4% 22.65s 81.98% main.main
0 0% 0.4% 22.65s 81.98% runtime.main
18.14s 65.65% 66.05% 19.91s 72.06% syscall.Syscall
0.03s 0.11% 66.16% 12.91s 46.72% net.(*conn).Read
0.10s 0.36% 66.52% 12.88s 46.62% net.(*netFD).Read
0.16s 0.58% 67.10% 12.78s 46.25% internal/poll.(*FD).Read
0.06s 0.22% 67.32% 11.87s 42.96% syscall.Read
0.11s 0.4% 67.72% 11.81s 42.74% syscall.read
0.02s 0.072% 67.79% 9.30s 33.66% net.(*conn).Write
0.05s 0.18% 67.97% 9.28s 33.59% net.(*netFD).Write
我逐渐减少了Conn.Write
和Conn.Read
调用的数量,并相应地增加了缓冲区的大小,以使传输的字节数保持不变。结果是程序进行的这些调用越少,其性能就越接近 C++ 版本。
推荐阅读
- python-3.x - Python 如何读取一行,然后询问用户是否愿意移动到下一行?
- python - 如何通过 python 将 CSV 数据转换为 .md 文件并在此过程中修改特定列?
- ios - 如何将 TextField 大小设置为与 TextField 字符串相同的大小?
- python - Connect4 w/ AI Bots:最佳对角移动(python)
- python - 如何使用 xml.etree.ElementTree 将 XML 文件中的所有相关字段放入 Python 中的 pandas 数据框?
- bash - 在 bash 与命令行中分配变量的区别
- ios - WKWebView 不显示导航栏
- javascript - React Native:将 toast 消息传递到导航屏幕
- c++ - C++ 函数问题
- image - 在 Canvas 上放置 .png 文件的好方法?