您的位置:首页 > 运维架构 > Linux

linux诡异的半连接(SYN_RECV)队列长度2

2013-12-16 00:35 274 查看
继续上回:我们已经确认了全连接队列的长度计算,接下来继续寻找半连接队列长度。

试着慢慢减小tcp_max_syn_backlog的值,但还是看不到半连接状态数量的变化。

实在没什么思路,只能Google之,搜出来的基本都是关于SYN Flood的文章,难道没同学关注过半连接队列的长度吗?

困扰数日终于在某个夜晚被我找一篇题为《关于半连接队列的释疑》的文章,激动呐。根据作者提供的思路我开始翻代码,注意我用的内核版本2.6.32,不同版本代码也有差异。

首先定位到tcp_v4_conn_request函数,在文件netipv4tcp_ipv4.c中。

?View
Code C

int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct inet_request_sock *ireq;
struct tcp_options_received tmp_opt;
struct request_sock *req;
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
#else
#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
#endif

/* Never answer to SYNs send to broadcast or multicast */
if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;

/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/

//关键函数inet_csk_reqsk_queue_is_full
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
want_cookie = 1;
} else
#endif
goto drop;
}

/* Accept backlog is full. If we have already queued enough
* of warm entries in syn queue, drop request. It is better than
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;

req = inet_reqsk_alloc(&tcp_request_sock_ops);
if (!req)
goto drop;
省略N多代码

跟进关键函数inet_csk_reqsk_queue_is_full,在文件includenetinet_connection_sock.h中。

?View
Code C

static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
}

跟进关键函数reqsk_queue_is_full,在文件includenetrequest_sock.h中。

?View
Code C

static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
//注意这里是用>>(右移)来判断的,不是大于号
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}

查找qlen和max_qlen_log的定义,在文件includenetrequest_sock.h中。

?View
Code C

/** struct listen_sock - listen state
*
* @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
*/
struct listen_sock {
u8			max_qlen_log;// 2^max_qlen_log = 半连接队列最大长度
/* 3 bytes hole, try to use */
int			qlen;//全连接队列的当前长度
int			qlen_young;
int			clock_hand;
u32			hash_rnd;
u32			nr_table_entries;
struct request_sock	*syn_table[0];
};

可见关键是如何计算max_qlen_log,前一篇博客提到了listen的系统调用:

?View
Code C

//file:net/socket.c

SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;

sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
//上限不超过somaxconn
if ((unsigned)backlog > somaxconn)
backlog = somaxconn;

err = security_socket_listen(sock, backlog);
if (!err)
//这里是关键。
err = sock->ops->listen(sock, backlog);

fput_light(sock->file, fput_needed);
}
return err;
}

sock->ops->listen其实是inet_listen,在文件netipv4af_inet.c中。

?View
Code C

int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;

lock_sock(sk);

err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;

old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;

/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
//关键函数inet_csk_listen_start
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;
err = 0;

out:
release_sock(sk);
return err;
}

跟进inet_csk_listen_start,在文件netipv4inet_connection_sock.c中。

?View
Code C

int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);

//关键函数reqsk_queue_alloc
int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
//后面省略
}

跟进reqsk_queue_alloc,在文件netcorerequest_sock.c中。

?View
Code C

int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
{
size_t lopt_size = sizeof(struct listen_sock);
struct listen_sock *lopt;

//这里开始影响到nr_table_entries的取值,内核版本小于2.6.20的话nr_table_entries是不会修改的
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
nr_table_entries = max_t(u32, nr_table_entries, 8);
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);

//nr_table_entries到这里已经确定
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
lopt = __vmalloc(lopt_size,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
return -ENOMEM;

//这里确定了lopt->max_qlen_log的值
for (lopt->max_qlen_log = 3;
(1 << lopt->max_qlen_log) < nr_table_entries;//内核版本小于2.6.20的话这里是sysctl_max_syn_backlog
lopt->max_qlen_log++);

get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;

write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
write_unlock_bh(&queue->syn_wait_lock);

return 0;
}

代码到此为止,然后我们计算一下为何在虚拟机S上的SYN_RECV状态数量会是256

nr_table_entries = listen的第二个参数int backlog ,上限是系统的somaxconn

若 somaxconn = 128 sysctl_max_syn_backlog = 4096 backlog = 511 则 nr_table_entries = 128

nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);

取两者较小的一个 nr_table_entries = 128

nr_table_entries = max_t(u32, nr_table_entries, 8);

取两者较大的一个 nr_table_entries = 128

nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); //roundup_pow_of_two - round the given value up to nearest power of two

roundup_pow_of_two(128 + 1) = 256

for (lopt->max_qlen_log = 3; (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++);

max_qlen_log = 8

判断半连接队列是否满 queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;

queue->listen_opt->qlen = 256 时reqsk_queue_is_full返回1 , 进入drop

所以queue->listen_opt->qlen 取值 0~255, 因此SYN_RECV状态数量会是 256

另外同事的测试结果为何与我的不同?

因为内核版本小于2.6.20的话max_qlen_log是直接由sysctl_max_syn_backlog决定的,所以半连接队列的长度就是等于sysctl_max_syn_backlog

文章有点长,不过总算是把问题给解决了。这里要特别感谢雨哥(博客),很多代码是他带着我分析的。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: