大龙的博客

常用链接

统计

最新评论

关于半连接队列的释疑 --- 转

1、到底那个是半连接队列
/** struct listen_sock – listen state
*
* @max_qlen_log – log_2 of maximal queued SYNs/REQUESTs
*/
struct listen_sock {
u8 max_qlen_log; /*2^max_qlen_log is the length of the accpet queue, max of max_qlen_log is 10. (2^10=1024)*/
/* 3 bytes hole, try to use */
int qlen; /* qlen is the current length of the accpet queue*/
int qlen_young;
int clock_hand;
u32 hash_rnd;
u32 nr_table_entries; /*nr_table_entries is the number of the syn_table,max is 512*/
struct request_sock *syn_table[0];
};
里面有几个关键的成员变量:max_qlen_log、qlen和syn_table。注意syn_table是一个零数组。

跟踪listen系统调用:
inet_listen
inet_csk_listen_start
reqsk_queue_alloc

在reqsk_queue_alloc中:
const int lopt_size = sizeof(struct listen_sock) +
nr_table_entries * sizeof(struct request_sock *);
struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL);
我们发现这里进行了分配内存,分配了nr_table_entries个struct request_sock *。
对于nr_table_entries,我们可以往回追踪:
err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
#define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */

跟踪SYN数据包的处理,在tcp_v4_conn_request中,最后调用了inet_csk_reqsk_queue_hash_add函数:
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
lopt->hash_rnd, lopt->nr_table_entries);

reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
inet_csk_reqsk_queue_added(sk, timeout);
}

reqsk_queue_hash_req将新建的request_sock添加到reqsk_queue中:
static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
u32 hash, struct request_sock *req,
unsigned long timeout)
{
struct listen_sock *lopt = queue->listen_opt;

req->expires = jiffies + timeout;
req->retrans = 0;
req->sk = NULL;
req->dl_next = lopt->syn_table[hash];

write_lock(&queue->syn_wait_lock);
lopt->syn_table[hash] = req;
write_unlock(&queue->syn_wait_lock);
}

inet_csk_reqsk_queue_added增加连接请求队列的计数,必要是设置计数器:
static inline void inet_csk_reqsk_queue_added(struct sock *sk,
const unsigned long timeout)
{
if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
inet_csk_reset_keepalive_timer(sk, timeout);
}
static inline int reqsk_queue_added(struct request_sock_queue *queue)
{
struct listen_sock *lopt = queue->listen_opt;
const int prev_qlen = lopt->qlen;

lopt->qlen_young++;
lopt->qlen++;
return prev_qlen;
}

其他的几个数据结构:
struct inet_connection_sock {
/* inet_sock has to be the first member! */
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state;
__u8 icsk_retransmits;
__u8 icsk_pending;
__u8 icsk_backoff;
__u8 icsk_syn_retries;
__u8 icsk_probes_out;
__u16 icsk_ext_hdr_len;
struct {
__u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
__u8 blocked; /* Delayed ACK was blocked by socket lock */
__u32 ato; /* Predicted tick of soft clock */
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
__u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
int enabled;

/* Range of MTUs to search */
int search_high;
int search_low;

/* Information on the current probe. */
int probe_size;
} icsk_mtup;
u32 icsk_ca_priv[16];
#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
};

struct request_sock_queue {
/*Points to the request_sock accept queue, when after 3 handshake will add the request_sock from syn_table to here*/
struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail;
rwlock_t syn_wait_lock;
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt;
};

因此,半连接队列在这里可以认为是icsk_accept_queue,叫做连接请求队列。

2、半连接队列的长度
跟踪inet_csk_reqsk_queue_is_full,发现会比较queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log,看来关键在于max_qlen_log。
发现reqsk_queue_alloc中:
for (lopt->max_qlen_log = 6; /*64*/
(1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
lopt->max_qlen_log++);

我们在/proc/sys/net/ipv4/tcp_max_syn_backlog中会可以设置max_syn_backlog,这个就是我们可以设置的半连接队列的长度。
默认是1024,那么max_qlen_log就是10了;加入我们设置成64,那么max_qlen_log就是6了,我们设置成128,就是7了;其他的依次类推。

3、连接请求的数据流向
在前面的分析中,SYN数据包的处理中,接收到SYN数据包,将会建立一个reqest_sock结构,添加到syn_table哈希表相应的表中。
接收到ACK数据包后,跟踪tcp_v4_do_rcv,发现会调用tcp_v4_hnd_req。
在tcp_v4_hnd_req中:
/* Check the request_sock is in the syn_table or not.
If the request_sock have been in the syn_table, then call tcp_check_req*/
/*If ACK in 3 handsharks, will find a request_sock in syn_table, then call tcp_check_req().*/
struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
iph->saddr, iph->daddr);
/*Normal: Call syn_recv_sock function(tcp_v4_syn_recv_sock)*/
if (req)
return tcp_check_req(sk, skb, req, prev);

在tcp_check_req中:
/*ipv4_specific.syn_recv_sock = tcp_v4_syn_recv_sock*/
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
req, NULL);
if (child == NULL)
goto listen_overflow;

/*Move the request_sock from the syn_table to accept_queue
Notes: syn_table isn’t A hlist_header structure.*/
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);

inet_csk_reqsk_queue_add(sk, req, child);
return child;

tcp_v4_syn_recv_sock会根据request_sock新建一个sock结构,并且进行一定的初始化,返回新建的sock结构。
将request_sock从syn_table中移到accept_queue中。

static inline void inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child)
{
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
}
static inline void reqsk_queue_add(struct request_sock_queue *queue,
struct request_sock *req,
struct sock *parent,
struct sock *child)
{
req->sk = child;
/*Add the number of backlog, that not completed 3 handsharks but have connected the server.*/*/
sk_acceptq_added(parent);

if (queue->rskq_accept_head == NULL)
queue->rskq_accept_head = req;
else
queue->rskq_accept_tail->dl_next = req;

queue->rskq_accept_tail = req;
req->dl_next = NULL;
}

4、accept系统调用的处理

三次握手之后,request_sock已经在rskq_accept队列中了,等待accept系统调用取走。
static inline void sk_acceptq_removed(struct sock *sk)
{
sk->sk_ack_backlog–;
}

static inline void sk_acceptq_added(struct sock *sk)
{
sk->sk_ack_backlog++;
}
这个时候,我们关注一个struct sock中的两个变量:
unsigned short sk_ack_backlog; /*sk_ack_backlog is the socket number that not completed 3 handsharks but have connected the server.*/
unsigned short sk_max_ack_backlog; /*sk_max_ack_backlog is the Max sk_ack_backlog, is assigned in the listen()*/
其中,sk_ack_backlog是已经完成了三次握手,但是还没有被accept系统调用处理的连接请求数量;sk_max_ack_backlog就是我们经常熟悉的listen的参数。

跟踪accept系统调用:
inet_csk_accept:
newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);

static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
struct sock *parent)
{
struct request_sock *req = reqsk_queue_remove(queue);
struct sock *child = req->sk;

BUG_TRAP(child != NULL);

sk_acceptq_removed(parent);
__reqsk_free(req);
return child;
}

注意这里free掉了在三次握手中建立的request_sock结构。

5、防止溢出的两个链表检查
在tcp_v4_conn_request中,对SYN包的处理过程中:

if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
want_cookie = 1;
} else
#endif
goto drop;
}

/* Accept backlog is full. If we have already queued enough
* of warm entries in syn queue, drop request. It is better than
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
/*If Accept Queue is full, Drop the packet*/
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;

这里面有两个队列的检查:request_sock队列和accept队列。
request_sock队列:
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
}
static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}

accept队列:
static inline int sk_acceptq_is_full(struct sock *sk)
{
return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
}

其中关系到4个变量,其中两个是sock的成员变量,两个是request_sock_queue中listen_opt的变量。

max_qlen_log的初始化:
在reqsk_queue_alloc中:
for (lopt->max_qlen_log = 6; /*64*/
(1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
lopt->max_qlen_log++);

sk_max_ack_backlog的初始化:
在inet_listen中:
sk->sk_max_ack_backlog = backlog;
注:sk_max_ack_backlog就是我们经常熟悉的listen的参数。

qlen的增加:
tcp_v4_conn_request
inet_csk_reqsk_queue_hash_add
inet_csk_reqsk_queue_added
reqsk_queue_added

注:跟踪SYN数据包的处理,在tcp_v4_conn_request中,最后调用了inet_csk_reqsk_queue_hash_add函数:
inet_csk_reqsk_queue_added(sk, timeout);
inet_csk_reqsk_queue_added增加连接请求队列的计数,必要时候设置计数器。
reqsk_queue_added:
lopt->qlen++;

qlen的减少:
tcp_v4_hnd_req
tcp_check_req
inet_csk_reqsk_queue_removed
reqsk_queue_removed

注:
在inet_csk_listen_stop中:
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
* or to send active reset (abort).
* Certainly, it is pretty dangerous while synflood, but it is
* bad justification for our negligence 8)
* To be honest, we are not able to make either
* of the variants now. –ANK
*/
reqsk_queue_destroy(&icsk->icsk_accept_queue);

sk_ack_backlog的增加:
tcp_check_req
inet_csk_reqsk_queue_add
reqsk_queue_add
sk_acceptq_added

sk_ack_backlog的减少:
inet_csk_accept
reqsk_queue_get_child
sk_acceptq_removed

posted on 2013-02-16 00:04 大龙 阅读(893) 评论(0)  编辑 收藏 引用


只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理