首页 > 解决方案 > Linux Socket 超时适用于 WSL,但不适用于 Ubuntu

问题描述

我尝试在没有服务器的情况下运行 TCP 客户端。这个想法只是定期尝试连接。为此,客户端尝试连接到 localhost 上的端口 1500。

一段代码:

    // Create socket
    if ((create_socket=socket (AF_INET, SOCK_STREAM, PF_UNSPEC)) > 0)
      printf ("Socket created\n");
    address.sin_family = AF_INET;
    address.sin_port = htons (1500);
    inet_aton (argv[1], &address.sin_addr);

    // Connect to server
    connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    FD_ZERO(&fdset);
    FD_SET(create_socket, &fdset);
    tv.tv_sec = 2;             /* 2 seconds timeout */
    tv.tv_usec = 0;

    rv = select(create_socket + 1, NULL, &fdset, NULL, &tv);
    if (rv == 1)
    {
        int so_error;
        socklen_t len = sizeof so_error;

        getsockopt(create_socket, SOL_SOCKET, SO_ERROR, &so_error, &len);

        if (so_error == 0)
        {
          printf ("Connection with server (%s) established \n",
          inet_ntoa (address.sin_addr));
        }
        else
        {
          printf("Error on connect: unsuccessfull\n");
          close (create_socket);
          continue;
        }
    }
    else if (rv == 0)
    {
      printf("Timeout on connect\n");
      close (create_socket);
      continue;
    }
    else
    {
      printf("Error on connect\n");
      close (create_socket);
      continue;
    }

我已经在 WSL 上的 Ubuntu 18.04 中设置了它。在那里,代码等待select定义的 2 秒超时并返回适当的返回值。(0 超时,1 连接)。在 WSL 和 VMware 上的返回值为connect-1。在 Ubuntu 18 (VMware) 中,该行没有停顿。在任何情况下,即使没有任何服务器监听该端口,我也会立即得到返回值 1。

为什么会有这种差异?

稍后在该代码中有类似的行为:

    tv.tv_sec = 2;
    tv.tv_usec = 0;
    if (setsockopt(create_socket, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv) < 0)
    {
      printf("Error on setsockopt SO_RCVTIMEO");
      exit(EXIT_FAILURE);
    }
    // INNER LOOP: Receive data
    do
    {
      size = recv(create_socket, buffer, BUF-1, 0);
      if( size > 0)
      {
        buffer[size] = '\0';
        printf ("Message received: %s\n", buffer);
      }
      else if (size == -1)
      {
      // on VMware, errno is 107 if there is no server, but coming to that line was not intended
        printf ("Timeout\n");
      }
      else //
      {
        printf("Server offline\n");
        // GO BACK TO OUTER LOOP and reconnect
        break;
      }

在这里,在 WSL 中recv,等待任何传入数据最多需要 2 秒。(但仅当上述块(连接,选择)指示有效连接时)在 VMware 中,我直接得到反馈。(即使没有连接)

它只是偶然在 WSL 上工作吗?

参数包含服务器 IP,为 127.0.0.1。 lsof显示没有连接。


2020-11-18 更新

这是Bodo要求的完整代码

#include <iostream>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/socket.h>
#include <cstring>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>

#define BUF 1024
using namespace std;

int main (int argc, char **argv) {
  int create_socket;
  char *buffer =  (char*)malloc(BUF);
  struct sockaddr_in address;
  int size;
  int rv;
  struct timeval tv;
  fd_set fdset;

  // HERE STARTS THE OUTER LOOP - Connect and restart connection
  do
  {
    // Create socket
    if ((create_socket=socket (AF_INET, SOCK_STREAM, PF_UNSPEC)) > 0)
      printf ("Socket created\n");
    address.sin_family = AF_INET;
    address.sin_port = htons (15000);
    inet_aton ("127.0.0.1", &address.sin_addr);

    // Connect to server
    int flags = fcntl(create_socket, F_GETFL, 0);
    if (flags == -1) return false;
    rv = connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    printf ("Connect. rv = %i\n", rv);

    if (rv == -1)
    {
      switch (errno)
      {
        case ECONNREFUSED:  printf ("errno = %i (ECONNREFUSED)\n", errno); break;
        default:  printf ("errno = %i (ECONNREFUSED)\n", errno); break;
      }

    }

    FD_ZERO(&fdset);
    FD_SET(create_socket, &fdset);

    tv.tv_sec = 2;
    tv.tv_usec = 0;
    rv = select(create_socket + 1, NULL, &fdset, NULL, &tv);
    if (rv == 1)
    {
        int so_error;
        socklen_t len = sizeof so_error;

        getsockopt(create_socket, SOL_SOCKET, SO_ERROR, &so_error, &len);

        if (so_error == 0)
        {
         printf ("Connection with server (%s) established \n",
          inet_ntoa (address.sin_addr));
        }
        else
        {
          printf("Error on connect: unsuccessfull\n");
          close (create_socket);
          continue;
        }
    }
    else if (rv == 0)
    {
      printf("Timeout on connect\n");
      close (create_socket);
      continue;
    }
    else
    {
      printf("Error on connect\n");
      close (create_socket);
      continue;
    }

    if (setsockopt(create_socket, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv) < 0)
    {
      printf("Error on setsockopt SO_RCVTIMEO");
      exit(EXIT_FAILURE);
    }
    // INNER LOOP: Receive data
    do
    {
      size = recv(create_socket, buffer, BUF-1, 0);
      if( size > 0)
      {
        buffer[size] = '\0';
        printf ("Data received: %s\n", buffer);
      }
      else if (size == -1)
      {
        printf ("Timeout\n");
      }
      else //
      {
        printf("Server offline\n");
        // GO BACK TO OUTER LOOP and reconnect
        break;
      }
    } while (strcmp (buffer, "quit\n") != 0);
    close (create_socket);
  } while (strcmp (buffer, "quit\n") != 0);
  return EXIT_SUCCESS;
}

在 WSL 中,输出是

Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)

然后 2 秒后什么都没有

Timeout on connect
Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)

又没有 2 秒...

VMware 中的输出

Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)
Connection with server (127.0.0.1) established 
Timeout
Timeout
Timeout
Timeout

没有超时的地方。

超时的想法是尝试定期连接,但不是尽可能快。

标签: linuxsocketstimeoutwindows-subsystem-for-linux

解决方案


很明显有什么不对errno = 111 (ECONNREFUSED)的时候紧跟着Connection with server (127.0.0.1) established.

connect返回-1and errnois NOTEINPROGRESS你不应该使用selectand getsockopt(...SO_ERROR...)。根据https://man7.org/linux/man-pages/man2/connect.2.html,这仅记录在EINPROGRESS.

在真正的 Linux 和 WSL 上,您都会errno = 111 (ECONNREFUSED)在失败后得到connect. 我认为 WSL 中的超时错误,因为已经报告了错误(连接被拒绝),因此等待结果没有意义。但由于未指定行为,它可能取决于实现。

如果你想在下一次连接尝试之前有一个延迟,你不应该使用select但例如sleep然后重复循环。

我建议这样的事情:

    rv = connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    printf ("Connect. rv = %i\n", rv);

    if (rv == -1)
    {
      switch (errno)
      {
        case ECONNREFUSED:  printf ("errno = %i (ECONNREFUSED) %s\n", errno, strerror(errno)); break;
        default:  printf ("errno = %i (other) %s\n", errno, strerror(errno)); break;
      }
      if(errno != EINPROGRESS)
      {
        sleep(10); // chose a suitable delay before next connection attempt
        continue;
      }
    }


推荐阅读