TCP的三个接收队列

时间：2015-04-01 19:38:14 阅读：372 评论：0 收藏：0 [点我收藏+]

标签：

之前对于TCP接收过程中的三个队列的关系之前没搞清楚。

这几天，在同事邱的帮助下，终于把关系理清了，故特此做个笔记。

一、在软中断中加入数据包

1615 int tcp_v4_rcv(struct sk_buff *skb)
1616 {
1617         const struct iphdr *iph;
1618         struct tcphdr *th;
1619         struct sock *sk;
1620         int ret;
1621         struct net *net = dev_net(skb->dev);
1622 
1623         if (skb->pkt_type != PACKET_HOST)
1624                 goto discard_it;
1625 
1626         /* Count it even if it‘s bad */
1627         TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1628 
1629         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1630                 goto discard_it;
1631 
1632         th = tcp_hdr(skb);
1633 
1634         if (th->doff < sizeof(struct tcphdr) / 4)
1635                 goto bad_packet;
1636         if (!pskb_may_pull(skb, th->doff * 4))
1637                 goto discard_it;
1638 
1639         /* An explanation is required here, I think.
1640          * Packet length and doff are validated by header prediction,
1641          * provided case of th->doff==0 is eliminated.
1642          * So, we defer the checks. */
1643         if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1644                 goto bad_packet;
1645 
1646         th = tcp_hdr(skb);
1647         iph = ip_hdr(skb);
1648         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1649         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1650                                     skb->len - th->doff * 4);
1651         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1652         TCP_SKB_CB(skb)->when    = 0;
1653         TCP_SKB_CB(skb)->flags   = iph->tos;
1654         TCP_SKB_CB(skb)->sacked  = 0;
1655 
1656         sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1657         if (!sk)
1658                 goto no_tcp_socket;
1659 
1660 process:
1661         if (sk->sk_state == TCP_TIME_WAIT)
1662                 goto do_time_wait;
1663 
1664         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1665                 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1666                 goto discard_and_relse;
1667         }
1668 
1669         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1670                 goto discard_and_relse;
1671         nf_reset(skb);
1672 
1673         if (sk_filter(sk, skb))
1674                 goto discard_and_relse;
1675 
1676         skb->dev = NULL;
1677 
1678         bh_lock_sock_nested(sk);
1679         ret = 0;
1680         if (!sock_owned_by_user(sk)) {
1681 #ifdef CONFIG_NET_DMA
1682                 struct tcp_sock *tp = tcp_sk(sk);
1683                 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1684                         tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1685                 if (tp->ucopy.dma_chan)
1686                         ret = tcp_v4_do_rcv(sk, skb);
1687                 else
1688 #endif
1689                 {
1690                         if (!tcp_prequeue(sk, skb))   //先尝试加入prequeue
1691                                 ret = tcp_v4_do_rcv(sk, skb);  //否则加入sk_receive_queue
1692                 }
1693         } else if (unlikely(sk_add_backlog(sk, skb))) {  //加入backlog
1694                 bh_unlock_sock(sk);
1695                 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1696                 goto discard_and_relse;
1697         }
1698         bh_unlock_sock(sk);
1699 
1700         sock_put(sk);
1701 
1702         return ret;
1703 
1704 no_tcp_socket:
1705         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1706                 goto discard_it;
1707 
1708         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1709 bad_packet:
1710                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1711         } else {
1712                 tcp_v4_send_reset(NULL, skb);
1713         }
1714 
1715 discard_it:
1716         /* Discard frame. */
1717         kfree_skb(skb);
1718         return 0;
1719 
1720 discard_and_relse:
1721         sock_put(sk);
1722         goto discard_it;
1723 
1724 do_time_wait:
1725         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1726                 inet_twsk_put(inet_twsk(sk));
1727                 goto discard_it;
1728         }
1729 
1730         if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1731                 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1732                 inet_twsk_put(inet_twsk(sk));
1733                 goto discard_it;
1734         }
1735         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1736         case TCP_TW_SYN: {
1737                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1738                                                         &tcp_hashinfo,
1739                                                         iph->daddr, th->dest,
1740                                                         inet_iif(skb));
1741                 if (sk2) {
1742                         inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1743                         inet_twsk_put(inet_twsk(sk));
1744                         sk = sk2;
1745                         goto process;
1746                 }
1747                 /* Fall through to ACK */
1748         }
1749         case TCP_TW_ACK:
1750                 tcp_v4_timewait_ack(sk, skb);
1751                 break;
1752         case TCP_TW_RST:
1753                 goto no_tcp_socket;
1754         case TCP_TW_SUCCESS:;
1755         }
1756         goto discard_it;
1757 }

1385  *      This routine copies from a sock struct into the user buffer.
1386  *
1387  *      Technical note: in 2.3 we work on _locked_ socket, so that
1388  *      tricks with *seq access order and skb->users are not required.
1389  *      Probably, code can be easily improved even more.
1390  */
1391 
1392 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1393                 size_t len, int nonblock, int flags, int *addr_len)
1394 {
1395         struct tcp_sock *tp = tcp_sk(sk);
1396         int copied = 0;
1397         u32 peek_seq;
1398         u32 *seq;
1399         unsigned long used;
1400         int err;
1401         int target;             /* Read at least this many bytes */
1402         long timeo;
1403         struct task_struct *user_recv = NULL;
1404         int copied_early = 0;
1405         struct sk_buff *skb;
1406         u32 urg_hole = 0;
1407 
1408         lock_sock(sk);
1409 
1410         TCP_CHECK_TIMER(sk);
1411 
1412         err = -ENOTCONN;
1413         if (sk->sk_state == TCP_LISTEN)
1414                 goto out;
1415 
1416         timeo = sock_rcvtimeo(sk, nonblock);
1417 
1418         /* Urgent data needs to be handled specially. */
1419         if (flags & MSG_OOB)
1420                 goto recv_urg;
1421 
1422         seq = &tp->copied_seq;
1423         if (flags & MSG_PEEK) {
1424                 peek_seq = tp->copied_seq;
1425                 seq = &peek_seq;
1426         }
1427 
1428         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1429 
1430 #ifdef CONFIG_NET_DMA
1431         tp->ucopy.dma_chan = NULL;
1432         preempt_disable();
1433         skb = skb_peek_tail(&sk->sk_receive_queue);
1434         {
1435                 int available = 0;
1436 
1437                 if (skb)
1438                         available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1439                 if ((available < target) &&
1440                     (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1441                     !sysctl_tcp_low_latency &&
1442                     dma_find_channel(DMA_MEMCPY)) {
1443                         preempt_enable_no_resched();
1444                         tp->ucopy.pinned_list =
1445                                         dma_pin_iovec_pages(msg->msg_iov, len);
1446                 } else {
1447                         preempt_enable_no_resched();
1448                 }
1449         }
1450 #endif
1451 
1452         do {
1453                 u32 offset;
1454 
1455                 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
1456                 if (tp->urg_data && tp->urg_seq == *seq) {
1457                         if (copied)
1458                                 break;
1459                         if (signal_pending(current)) {
1460                                 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1461                                 break;
1462                         }
1463                 }
1464 
1465                 /* Next get a buffer. */
1466 
1467                 skb_queue_walk(&sk->sk_receive_queue, skb) {  //从sk_receive_queue中依次获得待读取的段
1468                         /* Now that we have two receive queues this
1469                          * shouldn‘t happen.
1470                          */
1471                         if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1472                              KERN_INFO "recvmsg bug: copied %X "
1473                                        "seq %X rcvnxt %X fl %X\n", *seq,
1474                                        TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1475                                        flags))
1476                                 break;
1477 
1478                         offset = *seq - TCP_SKB_CB(skb)->seq;
1479                         if (tcp_hdr(skb)->syn)
1480                                 offset--;
1481                         if (offset < skb->len)
1482                                 goto found_ok_skb;
1483                         if (tcp_hdr(skb)->fin)
1484                                 goto found_fin_ok;
1485                         WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: "
1486                                         "copied %X seq %X rcvnxt %X fl %X\n",
1487                                         *seq, TCP_SKB_CB(skb)->seq,
1488                                         tp->rcv_nxt, flags);
1489                 }
1490 
1491                 /* Well, if we have backlog, try to process it now yet. */
1492 
1493                 if (copied >= target && !sk->sk_backlog.tail)  
1494                         break;                  //跳出循环，处理backlog
1495 
1496                 if (copied) {
1497                         if (sk->sk_err ||
1498                             sk->sk_state == TCP_CLOSE ||
1499                             (sk->sk_shutdown & RCV_SHUTDOWN) ||
1500                             !timeo ||
1501                             signal_pending(current))
1502                                 break;
1503                 } else {
1504                         if (sock_flag(sk, SOCK_DONE))
1505                                 break;
1506 
1507                         if (sk->sk_err) {
1508                                 copied = sock_error(sk);
1509                                 break;
1510                         }
1511 
1512                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1513                                 break;
1514 
1515                         if (sk->sk_state == TCP_CLOSE) {
1516                                 if (!sock_flag(sk, SOCK_DONE)) {
1517                                         /* This occurs when user tries to read
1518                                          * from never connected socket.
1519                                          */
1520                                         copied = -ENOTCONN;
1521                                         break;
1522                                 }
1523                                 break;
1524                         }
1525 
1526                         if (!timeo) {
1527                                 copied = -EAGAIN;
1528                                 break;
1529                         }
1530 
1531                         if (signal_pending(current)) {
1532                                 copied = sock_intr_errno(timeo);
1533                                 break;
1534                         }
1535                 }
1536 
1537                 tcp_cleanup_rbuf(sk, copied);
1538 
1539                 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1540                         /* Install new reader */
1541                         if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1542                                 user_recv = current;
1543                                 tp->ucopy.task = user_recv;
1544                                 tp->ucopy.iov = msg->msg_iov;
1545                         }
1546 
1547                         tp->ucopy.len = len;
1548 
1549                         WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1550                                 !(flags & (MSG_PEEK | MSG_TRUNC)));
1551 
1552                         /* Ugly... If prequeue is not empty, we have to
1553                          * process it before releasing socket, otherwise
1554                          * order will be broken at second iteration.
1555                          * More elegant solution is required!!!
1556                          *
1557                          * Look: we have the following (pseudo)queues:
1558                          *
1559                          * 1. packets in flight
1560                          * 2. backlog
1561                          * 3. prequeue
1562                          * 4. receive_queue
1563                          *
1564                          * Each queue can be processed only if the next ones
1565                          * are empty. At this point we have empty receive_queue.
1566                          * But prequeue _can_ be not empty after 2nd iteration,
1567                          * when we jumped to start of loop because backlog
1568                          * processing added something to receive_queue.
1569                          * We cannot release_sock(), because backlog contains
1570                          * packets arrived _after_ prequeued ones.
1571                          *
1572                          * Shortly, algorithm is clear --- to process all
1573                          * the queues in order. We could make it more directly,
1574                          * requeueing packets from backlog to prequeue, if
1575                          * is not empty. It is more elegant, but eats cycles,
1576                          * unfortunately.
1577                          */
1578                         if (!skb_queue_empty(&tp->ucopy.prequeue))       
1579                                 goto do_prequeue;  //处理prequeue
1580 
1581                         /* __ Set realtime policy in scheduler __ */
1582                 }
1583 
1584 #ifdef CONFIG_NET_DMA
1585                 if (tp->ucopy.dma_chan)
1586                         dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1587 #endif
1588                 if (copied >= target) {
1589                         /* Do not sleep, just process backlog. */
1590                         release_sock(sk);
1591                         lock_sock(sk);
1592                 } else
1593                         sk_wait_data(sk, &timeo);
1594 
1595 #ifdef CONFIG_NET_DMA
1596                 tcp_service_net_dma(sk, false);  /* Don‘t block */
1597                 tp->ucopy.wakeup = 0;
1598 #endif
1599 
1600                 if (user_recv) {
1601                         int chunk;
1602 
1603                         /* __ Restore normal policy in scheduler __ */
1604 
1605                         if ((chunk = len - tp->ucopy.len) != 0) {
1606                                 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1607                                 len -= chunk;
1608                                 copied += chunk;
1609                         }
1610 
1611                         if (tp->rcv_nxt == tp->copied_seq &&
1612                             !skb_queue_empty(&tp->ucopy.prequeue)) {
1613 do_prequeue:
1614                                 tcp_prequeue_process(sk);   //pruqueue的处理函数
1615 
1616                                 if ((chunk = len - tp->ucopy.len) != 0) {
1617                                         NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1618                                         len -= chunk;
1619                                         copied += chunk;
1620                                 }
1621                         }
1622                 }
1623                 if ((flags & MSG_PEEK) &&
1624                     (peek_seq - copied - urg_hole != tp->copied_seq)) {
1625                         if (net_ratelimit())
1626                                 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1627                                        current->comm, task_pid_nr(current));
1628                         peek_seq = tp->copied_seq;
1629                 }
1630                 continue;
1631 
1632         found_ok_skb:
1633                 /* Ok so how much can we use? */
1634                 used = skb->len - offset;
1635                 if (len < used)
1636                         used = len;
1637 
1638                 /* Do we have urgent data here? */
1639                 if (tp->urg_data) {
1640                         u32 urg_offset = tp->urg_seq - *seq;
1641                         if (urg_offset < used) {
1642                                 if (!urg_offset) {
1643                                         if (!sock_flag(sk, SOCK_URGINLINE)) {
1644                                                 ++*seq;
1645                                                 urg_hole++;
1646                                                 offset++;
1647                                                 used--;
1648                                                 if (!used)
1649                                                         goto skip_copy;
1650                                         }
1651                                 } else
1652                                         used = urg_offset;
1653                         }
1654                 }
1655 
1656                 if (!(flags & MSG_TRUNC)) {
1657 #ifdef CONFIG_NET_DMA
1658                         if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1659                                 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1660 
1661                         if (tp->ucopy.dma_chan) {
1662                                 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1663                                         tp->ucopy.dma_chan, skb, offset,
1664                                         msg->msg_iov, used,
1665                                         tp->ucopy.pinned_list);
1666 
1667                                 if (tp->ucopy.dma_cookie < 0) {
1668 
1669                                         printk(KERN_ALERT "dma_cookie < 0\n");
1670 
1671                                         /* Exception. Bailout! */
1672                                         if (!copied)
1673                                                 copied = -EFAULT;
1674                                         break;
1675                                 }
1676 
1677                                 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1678 
1679                                 if ((offset + used) == skb->len)
1680                                         copied_early = 1;
1681 
1682                         } else
1683 #endif
1684                         {
1685                                 err = skb_copy_datagram_iovec(skb, offset,
1686                                                 msg->msg_iov, used);
1687                                 if (err) {
1688                                         /* Exception. Bailout! */
1689                                         if (!copied)
1690                                                 copied = -EFAULT;
1691                                         break;
1692                                 }
1693                         }
1694                 }
1695 
1696                 *seq += used;
1697                 copied += used;
1698                 len -= used;
1699 
1700                 tcp_rcv_space_adjust(sk);
1701 
1702 skip_copy:
1703                 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1704                         tp->urg_data = 0;
1705                         tcp_fast_path_check(sk);
1706                 }
1707                 if (used + offset < skb->len)
1708                         continue;
1709 
1710                 if (tcp_hdr(skb)->fin)
1711                         goto found_fin_ok;
1712                 if (!(flags & MSG_PEEK)) {
1713                         sk_eat_skb(sk, skb, copied_early);
1714                         copied_early = 0;
1715                 }
1716                 continue;
1717 
1718         found_fin_ok:
1719                 /* Process the FIN. */
1720                 ++*seq;
1721                 if (!(flags & MSG_PEEK)) {
1722                         sk_eat_skb(sk, skb, copied_early);
1723                         copied_early = 0;
1724                 }
1725                 break;
1726         } while (len > 0);
1727 
1728         if (user_recv) {
1729                 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1730                         int chunk;
1731 
1732                         tp->ucopy.len = copied > 0 ? len : 0;
1733 
1734                         tcp_prequeue_process(sk);
1735 
1736                         if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1737                                 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1738                                 len -= chunk;
1739                                 copied += chunk;
1740                         }
1741                 }
1742 
1743                 tp->ucopy.task = NULL;
1744                 tp->ucopy.len = 0;
1745         }
1746 
1747 #ifdef CONFIG_NET_DMA
1748         tcp_service_net_dma(sk, true);  /* Wait for queue to drain */
1749         tp->ucopy.dma_chan = NULL;
1750 
1751         if (tp->ucopy.pinned_list) {
1752                 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1753                 tp->ucopy.pinned_list = NULL;
1754         }
1755 #endif
1756 
1757         /* According to UNIX98, msg_name/msg_namelen are ignored
1758          * on connected socket. I was just happy when found this 8) --ANK
1759          */
1760 
1761         /* Clean up data we have read: This will do ACK frames. */
1762         tcp_cleanup_rbuf(sk, copied);
1763 
1764         TCP_CHECK_TIMER(sk);
1765         release_sock(sk);
1766         return copied;
1767 
1768 out:
1769         TCP_CHECK_TIMER(sk);
1770         release_sock(sk);           //backlog的处理函数
1771         return err;
1772 
1773 recv_urg:
1774         err = tcp_recv_urg(sk, msg, len, flags);
1775         goto out;
1776 }

1240 static void tcp_prequeue_process(struct sock *sk)
1241 {
1242         struct sk_buff *skb;
1243         struct tcp_sock *tp = tcp_sk(sk);
1244 
1245         NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1246 
1247         /* RX process wants to run with disabled BHs, though it is not
1248          * necessary */
1249         local_bh_disable();
1250         while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1251                 sk_backlog_rcv(sk, skb);
1252         local_bh_enable();
1253 
1254         /* Clear memory counter. */
1255         tp->ucopy.memory = 0;
1256 }

先来跟踪一下tcp_prequeue_process()函数：

1240 static void tcp_prequeue_process(struct sock *sk)
1241 {
1242         struct sk_buff *skb;
1243         struct tcp_sock *tp = tcp_sk(sk);
1244 
1245         NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1246 
1247         /* RX process wants to run with disabled BHs, though it is not
1248          * necessary */
1249         local_bh_disable();
1250         while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1251                 sk_backlog_rcv(sk, skb);
1252         local_bh_enable();
1253 
1254         /* Clear memory counter. */
1255         tp->ucopy.memory = 0;
1256 }

618 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
619 {
620         return sk->sk_backlog_rcv(sk, skb); //实际回调tcp_v4_do_rcv()函数
621 }

1546 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1547 {
1548         struct sock *rsk;
1549 #ifdef CONFIG_TCP_MD5SIG
1550         /*
1551          * We really want to reject the packet as early as possible
1552          * if:
1553          *  o We‘re expecting an MD5‘d packet and this is no MD5 tcp option
1554          *  o There is an MD5 option and we‘re not expecting one
1555          */
1556         if (tcp_v4_inbound_md5_hash(sk, skb))
1557                 goto discard;
1558 #endif
1559 
1560         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1561                 TCP_CHECK_TIMER(sk);
1562                 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1563                         rsk = sk;
1564                         goto reset;
1565                 }
1566                 TCP_CHECK_TIMER(sk);
1567                 return 0;
1568         }
1569 
1570         if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1571                 goto csum_err;
1572 
1573         if (sk->sk_state == TCP_LISTEN) {
1574                 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1575                 if (!nsk)
1576                         goto discard;
1577 
1578                 if (nsk != sk) {
1579                         if (tcp_child_process(sk, nsk, skb)) {
1580                                 rsk = nsk;
1581                                 goto reset;
1582                         }
1583                         return 0;
1584                 }
1585         }
1586 
1587         TCP_CHECK_TIMER(sk);
1588         if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1589                 rsk = sk;
1590                 goto reset;
1591         }
1592         TCP_CHECK_TIMER(sk);
1593         return 0;
1594 
1595 reset:
1596         tcp_v4_send_reset(rsk, skb);
1597 discard:
1598         kfree_skb(skb);
1599         /* Be careful here. If this function gets more complicated and
1600          * gcc suffers from register pressure on the x86, sk (in %ebx)
1601          * might be destroyed here. This current version compiles correctly,
1602          * but you have been warned.
1603          */
1604         return 0;
1605 
1606 csum_err:
1607         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1608         goto discard;
1609 }

再来看看realease_sock()函数：

1952 void release_sock(struct sock *sk)
1953 {
1954         /*
1955          * The sk_lock has mutex_unlock() semantics:
1956          */
1957         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1958 
1959         spin_lock_bh(&sk->sk_lock.slock);
1960         if (sk->sk_backlog.tail)
1961                 __release_sock(sk);
1962         sk->sk_lock.owned = 0;
1963         if (waitqueue_active(&sk->sk_lock.wq))
1964                 wake_up(&sk->sk_lock.wq);
1965         spin_unlock_bh(&sk->sk_lock.slock);
1966 }

1523 static void __release_sock(struct sock *sk)
1524 {
1525         struct sk_buff *skb = sk->sk_backlog.head;
1526 
1527         do {
1528                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1529                 bh_unlock_sock(sk);
1530 
1531                 do {
1532                         struct sk_buff *next = skb->next;
1533 
1534                         skb->next = NULL;
1535                         sk_backlog_rcv(sk, skb);
1536 
1537                         /*
1538                          * We are in process context here with softirqs
1539                          * disabled, use cond_resched_softirq() to preempt.
1540                          * This is safe to do because we‘ve taken the backlog
1541                          * queue private:
1542                          */
1543                         cond_resched_softirq();
1544 
1545                         skb = next;
1546                 } while (skb != NULL);
1547 
1548                 bh_lock_sock(sk);
1549         } while ((skb = sk->sk_backlog.head) != NULL);
1550 
1551         /*
1552          * Doing the zeroing here guarantee we can not loop forever
1553          * while a wild producer attempts to flood us.
1554          */
1555         sk->sk_backlog.len = 0;
1556 }

TCP的三个接收队列

标签：

原文地址：http://www.cnblogs.com/mylinuxer/p/4384748.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行