Skip to content

Commit 079096f

Browse files
Eric Dumazetdavem330
authored andcommitted
tcp/dccp: install syn_recv requests into ehash table
In this patch, we insert request sockets into TCP/DCCP regular ehash table (where ESTABLISHED and TIMEWAIT sockets are) instead of using the per listener hash table. ACK packets find SYN_RECV pseudo sockets without having to find and lock the listener. In nominal conditions, this halves pressure on listener lock. Note that this will allow for SO_REUSEPORT refinements, so that we can select a listener using cpu/numa affinities instead of the prior 'consistent hash', since only SYN packets will apply this selection logic. We will shrink listen_sock in the following patch to ease code review. Signed-off-by: Eric Dumazet <[email protected]> Cc: Ying Cai <[email protected]> Cc: Willem de Bruijn <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 2feda34 commit 079096f

15 files changed

+160
-501
lines changed

include/net/inet_connection_sock.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,6 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
258258

259259
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
260260

261-
struct request_sock *inet_csk_search_req(struct sock *sk,
262-
const __be16 rport,
263-
const __be32 raddr,
264-
const __be32 laddr);
265261
int inet_csk_bind_conflict(const struct sock *sk,
266262
const struct inet_bind_bucket *tb, bool relax);
267263
int inet_csk_get_port(struct sock *sk, unsigned short snum);

include/net/inet_hashtables.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ void inet_put_port(struct sock *sk);
205205

206206
void inet_hashinfo_init(struct inet_hashinfo *h);
207207

208+
int inet_ehash_insert(struct sock *sk, struct sock *osk);
208209
void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
209210
void __inet_hash(struct sock *sk, struct sock *osk);
210211
void inet_hash(struct sock *sk);

include/net/request_sock.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,4 @@ static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
266266
return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log;
267267
}
268268

269-
void reqsk_queue_hash_req(struct request_sock_queue *queue,
270-
u32 hash, struct request_sock *req,
271-
unsigned long timeout);
272-
273269
#endif /* _REQUEST_SOCK_H */

include/net/tcp.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,7 +1618,6 @@ static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
16181618
/* /proc */
16191619
enum tcp_seq_states {
16201620
TCP_SEQ_STATE_LISTENING,
1621-
TCP_SEQ_STATE_OPENREQ,
16221621
TCP_SEQ_STATE_ESTABLISHED,
16231622
};
16241623

@@ -1717,8 +1716,6 @@ struct tcp_request_sock_ops {
17171716
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
17181717
struct flowi *fl, struct request_sock *req,
17191718
u16 queue_mapping, struct tcp_fastopen_cookie *foc);
1720-
void (*queue_hash_add)(struct sock *sk, struct request_sock *req,
1721-
const unsigned long timeout);
17221719
};
17231720

17241721
#ifdef CONFIG_SYN_COOKIES

net/core/request_sock.c

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -99,35 +99,9 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(
9999

100100
void reqsk_queue_destroy(struct request_sock_queue *queue)
101101
{
102-
/* make all the listen_opt local to us */
103102
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
104103

105-
if (reqsk_queue_len(queue) != 0) {
106-
unsigned int i;
107-
108-
for (i = 0; i < lopt->nr_table_entries; i++) {
109-
struct request_sock *req;
110-
111-
spin_lock_bh(&queue->syn_wait_lock);
112-
while ((req = lopt->syn_table[i]) != NULL) {
113-
lopt->syn_table[i] = req->dl_next;
114-
/* Because of following del_timer_sync(),
115-
* we must release the spinlock here
116-
* or risk a dead lock.
117-
*/
118-
spin_unlock_bh(&queue->syn_wait_lock);
119-
atomic_dec(&queue->qlen);
120-
if (del_timer_sync(&req->rsk_timer))
121-
reqsk_put(req);
122-
reqsk_put(req);
123-
spin_lock_bh(&queue->syn_wait_lock);
124-
}
125-
spin_unlock_bh(&queue->syn_wait_lock);
126-
}
127-
}
128-
129-
if (WARN_ON(reqsk_queue_len(queue) != 0))
130-
pr_err("qlen %u\n", reqsk_queue_len(queue));
104+
/* cleaning is done by req timers */
131105
kvfree(lopt);
132106
}
133107

net/dccp/ipv4.c

Lines changed: 21 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -444,36 +444,6 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
444444
}
445445
EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
446446

447-
static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
448-
{
449-
const struct dccp_hdr *dh = dccp_hdr(skb);
450-
const struct iphdr *iph = ip_hdr(skb);
451-
struct sock *nsk;
452-
/* Find possible connection requests. */
453-
struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
454-
iph->saddr, iph->daddr);
455-
if (req) {
456-
nsk = dccp_check_req(sk, skb, req);
457-
if (!nsk)
458-
reqsk_put(req);
459-
return nsk;
460-
}
461-
nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
462-
iph->saddr, dh->dccph_sport,
463-
iph->daddr, dh->dccph_dport,
464-
inet_iif(skb));
465-
if (nsk != NULL) {
466-
if (nsk->sk_state != DCCP_TIME_WAIT) {
467-
bh_lock_sock(nsk);
468-
return nsk;
469-
}
470-
inet_twsk_put(inet_twsk(nsk));
471-
return NULL;
472-
}
473-
474-
return sk;
475-
}
476-
477447
static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
478448
struct sk_buff *skb)
479449
{
@@ -705,26 +675,13 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
705675
* NOTE: the check for the packet types is done in
706676
* dccp_rcv_state_process
707677
*/
708-
if (sk->sk_state == DCCP_LISTEN) {
709-
struct sock *nsk = dccp_v4_hnd_req(sk, skb);
710-
711-
if (nsk == NULL)
712-
goto discard;
713-
714-
if (nsk != sk) {
715-
if (dccp_child_process(sk, nsk, skb))
716-
goto reset;
717-
return 0;
718-
}
719-
}
720678

721679
if (dccp_rcv_state_process(sk, skb, dh, skb->len))
722680
goto reset;
723681
return 0;
724682

725683
reset:
726684
dccp_v4_ctl_send_reset(sk, skb);
727-
discard:
728685
kfree_skb(skb);
729686
return 0;
730687
}
@@ -868,6 +825,27 @@ static int dccp_v4_rcv(struct sk_buff *skb)
868825
goto no_dccp_socket;
869826
}
870827

828+
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
829+
struct request_sock *req = inet_reqsk(sk);
830+
struct sock *nsk = NULL;
831+
832+
sk = req->rsk_listener;
833+
if (sk->sk_state == DCCP_LISTEN)
834+
nsk = dccp_check_req(sk, skb, req);
835+
if (!nsk) {
836+
reqsk_put(req);
837+
goto discard_it;
838+
}
839+
if (nsk == sk) {
840+
sock_hold(sk);
841+
reqsk_put(req);
842+
} else if (dccp_child_process(sk, nsk, skb)) {
843+
dccp_v4_ctl_send_reset(sk, skb);
844+
goto discard_it;
845+
} else {
846+
return 0;
847+
}
848+
}
871849
/*
872850
* RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
873851
* o if MinCsCov = 0, only packets with CsCov = 0 are accepted

net/dccp/ipv6.c

Lines changed: 22 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -290,37 +290,6 @@ static struct request_sock_ops dccp6_request_sock_ops = {
290290
.syn_ack_timeout = dccp_syn_ack_timeout,
291291
};
292292

293-
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
294-
{
295-
const struct dccp_hdr *dh = dccp_hdr(skb);
296-
const struct ipv6hdr *iph = ipv6_hdr(skb);
297-
struct request_sock *req;
298-
struct sock *nsk;
299-
300-
req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
301-
&iph->daddr, inet6_iif(skb));
302-
if (req) {
303-
nsk = dccp_check_req(sk, skb, req);
304-
if (!nsk)
305-
reqsk_put(req);
306-
return nsk;
307-
}
308-
nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
309-
&iph->saddr, dh->dccph_sport,
310-
&iph->daddr, ntohs(dh->dccph_dport),
311-
inet6_iif(skb));
312-
if (nsk != NULL) {
313-
if (nsk->sk_state != DCCP_TIME_WAIT) {
314-
bh_lock_sock(nsk);
315-
return nsk;
316-
}
317-
inet_twsk_put(inet_twsk(nsk));
318-
return NULL;
319-
}
320-
321-
return sk;
322-
}
323-
324293
static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
325294
{
326295
struct request_sock *req;
@@ -398,7 +367,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
398367
if (dccp_v6_send_response(sk, req))
399368
goto drop_and_free;
400369

401-
inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
370+
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
402371
return 0;
403372

404373
drop_and_free:
@@ -641,24 +610,6 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
641610
* NOTE: the check for the packet types is done in
642611
* dccp_rcv_state_process
643612
*/
644-
if (sk->sk_state == DCCP_LISTEN) {
645-
struct sock *nsk = dccp_v6_hnd_req(sk, skb);
646-
647-
if (nsk == NULL)
648-
goto discard;
649-
/*
650-
* Queue it on the new socket if the new socket is active,
651-
* otherwise we just shortcircuit this and continue with
652-
* the new socket..
653-
*/
654-
if (nsk != sk) {
655-
if (dccp_child_process(sk, nsk, skb))
656-
goto reset;
657-
if (opt_skb != NULL)
658-
__kfree_skb(opt_skb);
659-
return 0;
660-
}
661-
}
662613

663614
if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
664615
goto reset;
@@ -732,6 +683,27 @@ static int dccp_v6_rcv(struct sk_buff *skb)
732683
goto no_dccp_socket;
733684
}
734685

686+
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
687+
struct request_sock *req = inet_reqsk(sk);
688+
struct sock *nsk = NULL;
689+
690+
sk = req->rsk_listener;
691+
if (sk->sk_state == DCCP_LISTEN)
692+
nsk = dccp_check_req(sk, skb, req);
693+
if (!nsk) {
694+
reqsk_put(req);
695+
goto discard_it;
696+
}
697+
if (nsk == sk) {
698+
sock_hold(sk);
699+
reqsk_put(req);
700+
} else if (dccp_child_process(sk, nsk, skb)) {
701+
dccp_v6_ctl_send_reset(sk, skb);
702+
goto discard_it;
703+
} else {
704+
return 0;
705+
}
706+
}
735707
/*
736708
* RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
737709
* o if MinCsCov = 0, only packets with CsCov = 0 are accepted

0 commit comments

Comments
 (0)