标签:top rcv eve cap rect flags reac btree ready
TCP握手分为三个阶段,在握手开始之前,通信双方的套接字状态均为“TCP_CLOSE”,以下是这三个阶段:
(1)客户端发送一个标志位中SYN位为1的报文给服务端,并设套接字状态为“TCP_SYNSENT”
(2)服务端接到SYN报文,设套接字状态为“TCP_SYNRCV”,并回送一个SYN+ACK位均为1的报文
(3)客户端接到SYN+ACK报文,回送一个ACK位为1的报文,设套接字状态为“TCP_ESTABLISHED”,服务端接到ACK报文后,同样设置为“TCP_ESTABLISHED”
第一阶段客户端通过调用connect函数完成,connect实际上调用了内核中的__sys_connect函数。
以下代码是有关__sys_connect函数在文件net/scoket.c中的系统调用定义,由此可以看出,__sys_connect函数就是connect在内核中的实现。
SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen) { return __sys_connect(fd, uservaddr, addrlen); }
从__sys_connect函数开始进入三次握手的第一阶段,以下是部分代码:
int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) {
... sock = sockfd_lookup_light(fd, &err, &fput_needed); ... err = move_addr_to_kernel(uservaddr, addrlen, &address);
... err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, sock->file->f_flags); ... }
代码中的sock->ops->connect即是tcp_v4_connect函数,现在转到tcp_v4_connect函数:
1 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 2 { 3 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 4 struct inet_sock *inet = inet_sk(sk); 5 struct tcp_sock *tp = tcp_sk(sk); 6 __be16 orig_sport, orig_dport; 7 __be32 daddr, nexthop; 8 struct flowi4 *fl4; 9 struct rtable *rt; 10 int err; 11 struct ip_options_rcu *inet_opt; 12 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 13 14 if (addr_len < sizeof(struct sockaddr_in)) 15 return -EINVAL; 16 17 if (usin->sin_family != AF_INET) 18 return -EAFNOSUPPORT; 19 20 nexthop = daddr = usin->sin_addr.s_addr; 21 inet_opt = rcu_dereference_protected(inet->inet_opt, 22 lockdep_sock_is_held(sk)); 23 if (inet_opt && inet_opt->opt.srr) { 24 if (!daddr) 25 return -EINVAL; 26 nexthop = inet_opt->opt.faddr; 27 } 28 29 orig_sport = inet->inet_sport; 30 orig_dport = usin->sin_port; 31 fl4 = &inet->cork.fl.u.ip4; 32 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 33 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 34 IPPROTO_TCP, 35 orig_sport, orig_dport, sk); 36 if (IS_ERR(rt)) { 37 err = PTR_ERR(rt); 38 if (err == -ENETUNREACH) 39 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 40 return err; 41 } 42 43 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 44 ip_rt_put(rt); 45 return -ENETUNREACH; 46 } 47 48 if (!inet_opt || !inet_opt->opt.srr) 49 daddr = fl4->daddr; 50 51 if (!inet->inet_saddr) 52 inet->inet_saddr = fl4->saddr; 53 sk_rcv_saddr_set(sk, inet->inet_saddr); 54 55 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 56 /* Reset inherited state */ 57 tp->rx_opt.ts_recent = 0; 58 tp->rx_opt.ts_recent_stamp = 0; 59 if (likely(!tp->repair)) 60 tp->write_seq = 0; 61 } 62 63 inet->inet_dport = usin->sin_port; 64 sk_daddr_set(sk, daddr); 65 66 inet_csk(sk)->icsk_ext_hdr_len = 0; 67 if (inet_opt) 68 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 69 70 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 71 72 /* Socket identity is still unknown (sport may be zero). 73 * However we set state to SYN-SENT and not releasing socket 74 * lock select source port, enter ourselves into the hash tables and 75 * complete initialization after this. 76 */ 77 tcp_set_state(sk, TCP_SYN_SENT); 78 err = inet_hash_connect(tcp_death_row, sk); 79 if (err) 80 goto failure; 81 82 sk_set_txhash(sk); 83 84 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 85 inet->inet_sport, inet->inet_dport, sk); 86 if (IS_ERR(rt)) { 87 err = PTR_ERR(rt); 88 rt = NULL; 89 goto failure; 90 } 91 /* OK, now commit destination to socket. */ 92 sk->sk_gso_type = SKB_GSO_TCPV4; 93 sk_setup_caps(sk, &rt->dst); 94 rt = NULL; 95 96 if (likely(!tp->repair)) { 97 if (!tp->write_seq) 98 tp->write_seq = secure_tcp_seq(inet->inet_saddr, 99 inet->inet_daddr, 100 inet->inet_sport, 101 usin->sin_port); 102 tp->tsoffset = secure_tcp_ts_off(sock_net(sk), 103 inet->inet_saddr, 104 inet->inet_daddr); 105 } 106 107 inet->inet_id = tp->write_seq ^ jiffies; 108 109 if (tcp_fastopen_defer_connect(sk, &err)) 110 return err; 111 if (err) 112 goto failure; 113 114 err = tcp_connect(sk); 115 116 if (err) 117 goto failure; 118 119 return 0; 120 121 failure: 122 /* 123 * This unhashes the socket and releases the local port, 124 * if necessary. 125 */ 126 tcp_set_state(sk, TCP_CLOSE); 127 ip_rt_put(rt); 128 sk->sk_route_caps = 0; 129 inet->inet_dport = 0; 130 return err; 131 }
在tcp_v4_connect函数中为套接字填充一些变量,将套接字的状态修改为“TCP_SYNSENT”,然后进入tcp_connect函数。
1 int tcp_connect(struct sock *sk) 2 { 3 struct tcp_sock *tp = tcp_sk(sk); 4 struct sk_buff *buff; 5 int err; 6 7 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); 8 9 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 10 return -EHOSTUNREACH; /* Routing failure or similar. */ 11 12 tcp_connect_init(sk); 13 14 if (unlikely(tp->repair)) { 15 tcp_finish_connect(sk, NULL); 16 return 0; 17 } 18 19 buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); 20 if (unlikely(!buff)) 21 return -ENOBUFS; 22 23 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 24 tcp_mstamp_refresh(tp); 25 tp->retrans_stamp = tcp_time_stamp(tp); 26 tcp_connect_queue_skb(sk, buff); 27 tcp_ecn_send_syn(sk, buff); 28 tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); 29 30 /* Send off SYN; include data in Fast Open. */ 31 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 32 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 33 39 ...47 48 /* Timer for repeating the SYN until an answer. */ 49 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 50 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 51 return 0; 52 }
通过调用tcp_transmit_skb函数构造SYN报文并发送出去,并设立一个定时器。
这一阶段函数的调用栈:
__sys_connect -> inet_stream_connect -> __inet_stream_connect -> tcp_v4_connect -> tcp_connect -> tcp_transmit_skb
这一阶段从中通过tcp_v4_rcv函数从ip层接收数据开始,以下是tcp_v4_rcv的部分代码:
1 int tcp_v4_rcv(struct sk_buff *skb) 2 { 3 ... 4 5 if (sk->sk_state == TCP_LISTEN) { 6 ret = tcp_v4_do_rcv(sk, skb); 7 goto put_and_return; 8 } 9 10 ... 11 12 put_and_return: 13 if (refcounted) 14 sock_put(sk); 15 16 return ret; 17 ... 18 }
由于当前套接字状态为“TCP_LISTEN”,进入tcp_v4_do_rcv函数执行
1 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 2 { 3 ... 4 5 if (sk->sk_state == TCP_LISTEN) { 6 7 if (tcp_rcv_state_process(sk, skb)) { 8 rsk = sk; 9 goto reset; 10 } 11 return 0; 12 ... 13 }
tcp_rcv_state_process函数专门用来处理套接字状态的转换,先贴出一张状态转换图:
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) 2 { 3 ... 4 5 switch (sk->sk_state) { 6 case TCP_LISTEN: 7 if (th->ack) 8 return 1; 9 10 if (th->rst) 11 goto discard; 12 13 if (th->syn) { 14 if (th->fin) 15 goto discard; 16 /* It is possible that we process SYN packets from backlog, 17 * so we need to make sure to disable BH and RCU right there. 18 */ 19 rcu_read_lock(); 20 local_bh_disable(); 21 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; 22 local_bh_enable(); 23 rcu_read_unlock(); 24 25 if (!acceptable) 26 return 1; 27 consume_skb(skb); 28 return 0; 29 } 30 goto discard; 31 ... 32 }
这是tcp_rcv_state_process在“TCP_LISTEN”阶段执行的代码,核心在于22行的icsk->icsk_af_ops->conn_request,在此处一路执行tcp_v4_conn_request, tcp_conn_request。
以下是tcp_conn_request的部分代码:
1 if (fastopen_sk) { 2 af_ops->send_synack(fastopen_sk, dst, &fl, req, 3 &foc, TCP_SYNACK_FASTOPEN); 4 /* Add the child socket directly into the accept queue */ 5 inet_csk_reqsk_queue_add(sk, req, fastopen_sk); 6 sk->sk_data_ready(sk); 7 bh_unlock_sock(fastopen_sk); 8 sock_put(fastopen_sk); 9 } else { 10 tcp_rsk(req)->tfo_listener = false; 11 if (!want_cookie) 12 inet_csk_reqsk_queue_hash_add(sk, req, 13 tcp_timeout_init((struct sock *)req)); 14 af_ops->send_synack(sk, dst, &fl, req, &foc, 15 !want_cookie ? TCP_SYNACK_NORMAL : 16 TCP_SYNACK_COOKIE); 17 if (want_cookie) { 18 reqsk_free(req); 19 return 0; 20 } 21 }
主要执行了send_synack函数,send_synack函数用于将SYN+ACK报文发送出去。
这一阶段函数的调用栈:
tcp_v4_rcv -> tcp_v4_do_rcv -> tcp_rcv_state_process -> tcp_v4_conn_request -> tcp_conn_request -> tcp_v4_send_synack
同上一阶段一样,从ip接收到报文后一路执行tcp_v4_rcv, tcp_v4_do_rcv,进入tcp_rcv_state_process函数:
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 2 const struct tcphdr *th, unsigned int len) 3 { 4 ... 5 switch (sk->sk_state) { 6 case TCP_SYN_SENT: 7 //进入到synack报文的处理流程 8 queued = tcp_rcv_synsent_state_process(sk, skb, th, len); 9 if (queued >= 0) 10 return queued; 11 12 /* Do step6 onward by hand. */ 13 tcp_urg(sk, skb, th); 14 __kfree_skb(skb); 15 tcp_data_snd_check(sk); 16 return 0; 17 } 18 ... 19 }
在tcp_rcv_synsent_state_process函数中又调用了tcp_finish_connect函数,tcp_finish_connect函数做了三件事:
(1)将套接字状态设置为"TCP_ESTABLISHED"
(2)调用tcp_send_ack函数发送一个ACK包
(3)初始化一些参数
tcp_send_ack函数又调用tcp_transmit_skb将ACK报文从网络上发出去。
最后是服务端接收到ACK报文,依次执行tcp_v4_rcv,tcp_v4_do_rcv,tcp_rcv_state_process函数,将套接字的状态设置为"TCP_ESTABLISHED",至此,三次握手过程结束。
这一阶段函数的调用栈:
tcp_v4_rcv -> tcp_v4_do_rcv -> tcp_rcv_synsent_state_process -> tcp_send_ack -> tcp_transmit_skb
tcp_v4_rcv -> tcp_rcv_state_process
标签:top rcv eve cap rect flags reac btree ready
原文地址:https://www.cnblogs.com/hujisha/p/12104441.html