首页 > 解决方案 > 为什么有时 PACKET_TX_RING 性能下降,只能到达线速的一半

问题描述

我在 ubuntu 16.04LTS 上实现了 packet_send 例程来发送带有 tx_ring 的数据包,有时它可以到达线速,但有时只能到达线速的一半。

有时20s会恢复,有时几分钟,没有观察到规则。

请有人帮我找出原因!

下面贴测试数据,
sar结果,线速=1Gbps

    16:34:08:830 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:08:830 PM     ens33     31.00  82718.00      2.20 119249.11      0.00      0.00      0.00     97.69
    
    16:34:09:830 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:09:830 PM     ens33     30.00  83301.00      2.14 120092.89      0.00      0.00      0.00     98.38
    16:34:10:831
    16:34:10:831 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:11:028 PM     ens33     29.00  84618.00      2.08 121982.30      0.00      0.00      0.00     99.93
    16:34:11:831
    16:34:11:831 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:12:118 PM     ens33     29.00  84285.00      2.08 121509.26      0.00      0.00      0.00     99.54
    16:34:12:831
    16:34:12:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:12:832 PM     ens33     30.00  83709.00      2.19 120664.26      0.00      0.00      0.00     98.85
    16:34:13:831
    16:34:13:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:13:832 PM     ens33     30.00  85626.00      2.14 123425.94      0.00      0.00      0.00    101.11
    16:34:14:831
    16:34:14:831 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:14:831 PM     ens33     29.00  85910.00      2.08 123829.60      0.00      0.00      0.00    101.44
    16:34:15:831
    16:34:15:831 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:15:832 PM     ens33     32.00  84821.00      2.26 122267.27      0.00      0.00      0.00    100.16
    16:34:16:831
    16:34:16:831 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:16:832 PM     ens33     31.00  86557.00      2.20 124772.29      0.00      0.00      0.00    102.21
    16:34:17:831
    16:34:17:831 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:17:832 PM     ens33     29.00  81088.00      2.08 116886.07      0.00      0.00      0.00     95.75
    16:34:18:832

从这里开始性能下降

    16:34:18:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:18:832 PM     ens33     29.00  54067.00      2.09  77973.99      0.00      0.00      0.00     63.88
    16:34:19:832
    16:34:19:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:19:833 PM     ens33     29.00  40137.00      2.08  57876.41      0.00      0.00      0.00     47.41
    16:34:20:832
    16:34:20:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:20:832 PM     ens33     29.00  39912.00      2.08  57556.60      0.00      0.00      0.00     47.15
    16:34:21:832
    16:34:21:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:21:832 PM     ens33     31.00  40215.00      2.20  57971.92      0.00      0.00      0.00     47.49
    16:34:22:832
    16:34:22:832 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:22:832 PM     ens33     29.00  40254.00      2.08  58033.12      0.00      0.00      0.00     47.54
    16:34:23:833
    16:34:23:833 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:23:833 PM     ens33     31.00  40245.00      2.20  58015.83      0.00      0.00      0.00     47.53
    16:34:24:833
    16:34:24:833 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:24:833 PM     ens33     31.00  40173.00      2.20  57945.90      0.00      0.00      0.00     47.47
    16:34:25:833
    16:34:25:833 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:25:833 PM     ens33     28.00  40478.00      2.02  58379.48      0.00      0.00      0.00     47.82
    16:34:26:833
    16:34:26:833 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:26:833 PM     ens33     32.00  40350.00      2.41  58168.46      0.00      0.00      0.00     47.65
    16:34:27:833
    16:34:27:833 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:27:834 PM     ens33     33.00  40399.00      2.47  58248.12      0.00      0.00      0.00     47.72

在这里恢复

    16:34:41:834 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:41:835 PM     ens33     31.00  76530.00      2.20 110334.69      0.00      0.00      0.00     90.39
    16:34:42:836
    16:34:42:836 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
    16:34:42:836 PM     ens33     26.00  85416.00      1.91 123126.53      0.00      0.00      0.00    100.87

最佳结果

    16:34:08:611top - 14:40:50 up 10 days, 21:13,  3 users,  load average: 3.26, 2.72, 1.62
    16:34:08:611Tasks: 207 total,   2 running, 205 sleeping,   0 stopped,   0 zombie
    16:34:08:611%Cpu(s):  0.3 us, 27.4 sy,  0.0 ni, 45.4 id,  0.0 wa,  0.0 hi, 26.9 si,  0.0 st
    16:34:08:611KiB Mem :  4037752 total,   151772 free,  2171892 used,  1714088 buff/cache
    16:34:08:611KiB Swap:  4191228 total,  4191072 free,      156 used.  1487920 avail Mem 
    16:34:08:611
    16:34:08:611  PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND                                                                                                
    16:34:08:61161597 root      20   0 4817224 591332  81796 S 134.7 14.6 127:48.11 packet_send                                                                                                   
    16:34:08:611   18 root      20   0       0      0      0 R  79.2  0.0  57:36.18 ksoftirqd/2                                                                                                                                                                                    
    16:34:08:61145073 root      20   0       0      0      0 S   1.0  0.0   0:00.37 kworker/u128:1                                                                                         
    16:34:08:61249928 telnetd   20   0   19168   2116   1936 S   1.0  0.1   0:00.07 in.telnetd                                                                                                                                                                                         
    16:34:08:612    1 root      20   0   37736   5804   4020 S   0.0  0.1   0:10.96 systemd                                                                                                
    16:34:08:613    2 root      20   0       0      0      0 S   0.0  0.0   0:00.29 kthreadd                                                                                               
    16:34:08:613    3 root      20   0       0      0      0 S   0.0  0.0   0:05.73 ksoftirqd/0                                                                                            
    16:34:08:613    5 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/0:0H                                                                                           
    16:34:08:613    7 root      20   0       0      0      0 S   0.0  0.0  25:54.40 rcu_sched                                                                                              
    16:34:08:613    8 root      20   0       0      0      0 S   0.0  0.0   0:00.05 rcu_bh                                                                                                 
    16:34:08:613    9 root      rt   0       0      0      0 S   0.0  0.0   1:30.57 migration/0                                                                                            
    16:34:08:614   10 root      rt   0       0      0      0 S   0.0  0.0   0:08.82 watchdog/0                                                                                             
    16:34:08:614   11 root      rt   0       0      0      0 S   0.0  0.0   0:11.23 watchdog/1                                                                                             
    16:34:08:614   12 root      rt   0       0      0      0 S   0.0  0.0   2:00.00 migration/1                                                                                            
    16:34:08:614   13 root      20   0       0      0      0 S   0.0  0.0   0:06.68 ksoftirqd/1                                                                                            
    16:34:08:614   15 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/1:0H                                                                                           
    16:34:08:614   16 root      rt   0       0      0      0 S   0.0  0.0   0:11.27 watchdog/2                                                                                             
    16:34:08:614   17 root      rt   0       0      0      0 S   0.0  0.0   1:21.77 migration/2                                                                                            
    16:34:08:614   20 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/2:0H                                                                                           
    16:34:08:614   21 root      rt   0       0      0      0 S   0.0  0.0   0:10.96 watchdog/3                                                                                             
    16:34:08:615   22 root      rt   0       0      0      0 S   0.0  0.0   1:21.40 migration/3                                                                                            
    16:34:08:615   23 root      20   0       0      0      0 S   0.0  0.0   0:06.05 ksoftirqd/3   

    16:34:19:657top - 14:41:01 up 10 days, 21:14,  3 users,  load average: 3.44, 2.78, 1.65
    16:34:19:657Tasks: 207 total,   1 running, 206 sleeping,   0 stopped,   0 zombie
    16:34:19:657%Cpu(s):  0.0 us, 21.9 sy,  0.0 ni, 72.9 id,  0.0 wa,  0.0 hi,  5.2 si,  0.0 st
    16:34:19:657KiB Mem :  4037752 total,   145764 free,  2177852 used,  1714136 buff/cache
    16:34:19:657KiB Swap:  4191228 total,  4191072 free,      156 used.  1481976 avail Mem 
    16:34:19:657
    16:34:19:657  PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND                                                                                                
    16:34:19:65761597 root      20   0 4817224 591332  81796 S 115.0 14.6 128:02.40 packet_send                                                                                                                                                                                           
    16:34:19:65847844 root      20   0   40516   3812   3172 R   1.0  0.1   0:01.62 top                                                                                                    
    16:34:19:65850061 xiejie    20   0    6096    776    708 S   1.0  0.0   0:00.03 sar                                                                                                                                                                                                    
    16:34:19:658    1 root      20   0   37736   5804   4020 S   0.0  0.1   0:10.96 systemd                                                                                                
    16:34:19:658    2 root      20   0       0      0      0 S   0.0  0.0   0:00.29 kthreadd                                                                                               
    16:34:19:658    3 root      20   0       0      0      0 S   0.0  0.0   0:05.73 ksoftirqd/0                                                                                            
    16:34:19:658    5 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/0:0H                                                                                           
    16:34:19:658    7 root      20   0       0      0      0 S   0.0  0.0  25:54.45 rcu_sched                                                                                              
    16:34:19:658    8 root      20   0       0      0      0 S   0.0  0.0   0:00.05 rcu_bh                                                                                                 
    16:34:19:658    9 root      rt   0       0      0      0 S   0.0  0.0   1:30.57 migration/0                                                                                            
    16:34:19:658   10 root      rt   0       0      0      0 S   0.0  0.0   0:08.82 watchdog/0                                                                                             
    16:34:19:658   11 root      rt   0       0      0      0 S   0.0  0.0   0:11.23 watchdog/1                                                                                             
    16:34:19:658   12 root      rt   0       0      0      0 S   0.0  0.0   2:00.00 migration/1                                                                                            
    16:34:19:658   13 root      20   0       0      0      0 S   0.0  0.0   0:06.69 ksoftirqd/1                                                                                            
    16:34:19:658   15 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/1:0H                                                                                           
    16:34:19:658   16 root      rt   0       0      0      0 S   0.0  0.0   0:11.27 watchdog/2                                                                                             
    16:34:19:658   17 root      rt   0       0      0      0 S   0.0  0.0   1:21.77 migration/2                                                                                            
    16:34:19:659   18 root      20   0       0      0      0 S   0.0  0.0  57:43.62 ksoftirqd/2                                                                                            
    16:34:19:659   20 root       0 -20       0      0      0 S   0.0  0.0   0:00.00 kworker/2:0H                                                                                           
    16:34:19:659   21 root      rt   0       0      0      0 S   0.0  0.0   0:10.96 watchdog/3                                                                                             
    16:34:19:659   22 root      rt   0       0      0      0 S   0.0  0.0   1:21.40 migration/3                                                                                            
    16:34:19:659   23 root      20   0       0      0      0 S   0.0  0.0   0:06.05 ksoftirqd/3

实现就像

struct ring {
    struct iovec *frames;
    uint8_t *mm_space;
    size_t mm_len;
    struct sockaddr_ll s_ll;
    union {
        struct tpacket_req layout;
#ifdef HAVE_TPACKET3
        struct tpacket_req3 layout3;
#endif
        uint8_t raw;
    };
};

struct ring tx_ring;
/* tx_ring setup */
sock = socket(PF_PACKET, SOCK_RAW, 0);
ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard, sizeof(discard)); /*set PACKET_LOSS */
tx_ring.layout.tp_block_size = PAGE_SIZE << 2; /*PAGE_SIZE=4096 */
tx_ring.layout.tp_frame_size = TPACKET_ALIGNMENT << 7; 
tx_ring.layout.tp_block_nr = 256;
tx_ring.layout.tp_frame_nr = tx_ring.layout.tp_block_nr*(tx_ring.layout.tp_block_size/tx_ring.layout.tp_frame_size);
setsockopt(sock, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)); /* set version TPACKET_V2*/
setsockopt(sock, SOL_PACKET, PACKET_TX_RING, &tx_ring.layout, sizeof(tx_ring.layout));
tx_ring.mm_len = (size_t) tx_ring.layout.tp_block_size *  tx_ring.layout.tp_block_nr;
tx_ring.mm_space = mmap(NULL, tx_ring.mm_len, PROT_READ | PROT_WRITE,
                  MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
/* map frames */
tx_ring.frames = xzmalloc_aligned(len, CO_CACHE_LINE_SIZE);
    for (i = 0; i < num; ++i) {
        tx_ring.frames[i].iov_len = size;
        tx_ring.frames[i].iov_base = tx_ring.mm_space + (i * size);
    }
/* bind */
bind(sock, (struct sockaddr *) &tx_ring.s_ll, sizeof(tx_ring.s_ll)); /*sll_family = AF_PACKET; sll_protocol=htons(ETH_P_ALL)*/


/* implementation of the xmit routine */
static void xmit_fastpath(struct ctx *ctx, unsigned int packet_num)
{
    uint8_t *out = NULL;
    static unsigned int it = 0;
    unsigned long num = packet_num;
    struct frame_map *hdr;

    while (likely(sigint==0 && num > 0 && plen > 0)) {
        /* user_may_pull_from_tx -> return !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); */
        if (!user_may_pull_from_tx(tx_ring.frames[it].iov_base)) {
            int ret = pull_and_flush_tx_ring(sock); /* sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0); */
            if (unlikely(ret < 0)) {
                if (errno != EBADF && errno != ENOBUFS)
                    panic();
            }
            continue;
        }

        hdr = tx_ring.frames[it].iov_base;
        out = ((uint8_t *) hdr) + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll);

        hdr->tp_h.tp_snaplen = packets[i].len;
        hdr->tp_h.tp_len = packets[i].len;

        memcpy(out, packets[i].payload, packets[i].len);
        num--;
        kernel_may_pull_from_tx(&hdr->tp_h); /*tp_status = TP_STATUS_SEND_REQUEST;*/
        it = (it + 1) % tx_ring.layout.tp_frame_nr;
    }
}

void send_packets(u_int frameTotalNum,const u_char *packetData, u_int pkt_len)
{
    load_packets(packetData, pkt_len); /* load packetData to packets*/
    xmit_fastpath(ctx, frameTotalNum);
}
void xmit_finish()
{
    int i = 0, retry=100;
    int sock = dev_io_fd_get(ctx.dev_out);
    while (pull_and_flush_tx_ring_wait(sock) < 0 && errno == ENOBUFS && retry-- > 0)
        usleep(10000);
    close(sock);
    free(ctx);
}
int main(int argc, char *argv[]){
    char rawData[] = {}; /* fill test data like 1400 bytes data with l2&l3 header */
        int i = 10000000;
    do{
        send_packets(1000,rawData,sizeof(rawData));
    }while(i-->0);
    xmit_finish(); /* clear */
    return 0;
}

标签: linuxsocketsringtx

解决方案


推荐阅读