本文分析基于Linux Kernel 1.2.13
原创作品,转载请标明出处http://blog.csdn.net/yming0221/article/details/7532512
更多请看专栏,地址http://blog.csdn.net/column/details/linux-kernel-net.html
作者:闫明
注:标题中的”(上)“,”(下)“表示分析过程基于数据包的传递方向:”(上)“表示分析是从底层向上分析、”(下)“表示分析是从上向下分析。
这里看看数据包从IP层是如何交给传输层来处理的,为了方便,这里传输层以UDP协议为例来分析。
从ip_rcv()函数中可以看到
- /*
- * Pass on the datagram to each protocol that wants it,
- * based on the datagram protocol. We should really
- * check the protocol handler‘s return values here...
- */
- ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
- (ntohs(iph->tot_len) - (iph->ihl * 4)),
- iph->saddr, 0, ipprot);
这里调用指定协议的handler函数,如果是UDP协议,该函数的定义 udp_protocol如下
- static struct inet_protocol udp_protocol = {
- udp_rcv, /* UDP handler */
- NULL, /* Will be UDP fraglist handler */
- udp_err, /* UDP error control */
- &tcp_protocol, /* next */
- IPPROTO_UDP, /* protocol ID */
- 0, /* copy */
- NULL, /* data */
- "UDP" /* name */
- };
先看UDP协议数据报的报头定义如下:比较简单
- struct udphdr {
- unsigned short source;//源端口
- unsigned short dest;//目的端口
- unsigned short len;//数据包长度
- unsigned short check;//检验和
- };
下面就分析下udp_rcv()函数,流程图:
- /*
- * All we need to do is get the socket, and then do a checksum.
- */
-
- int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
- unsigned long daddr, unsigned short len,
- unsigned long saddr, int redo, struct inet_protocol *protocol)
- {
- struct sock *sk;
- struct udphdr *uh;
- unsigned short ulen;
- int addr_type = IS_MYADDR;
-
- if(!dev || dev->pa_addr!=daddr)//检查这个数据包是不是发送给本地的数据包
- addr_type=ip_chk_addr(daddr);//该函数定义在devinet.c中,用于检查ip地址是否是本地或多播、广播地址
-
- /*
- * Get the header.
- */
- uh = (struct udphdr *) skb->h.uh;//获得UDP数据报的报头
-
- ip_statistics.IpInDelivers++;
-
- /*
- * Validate the packet and the UDP length.
- */
-
- ulen = ntohs(uh->len);
- //参数len表示ip负载长度(IP数据报的数据部分长度)= UDP数据包头+UDP数据包的数据部分+填充部分长度
- //ulen表示的是UDP数据报首部和负载部分的长度,所以正常情况下len>=ulen
- if (ulen > len || len < sizeof(*uh) || ulen < sizeof(*uh))
- {
- printk("UDP: short packet: %d/%d\n", ulen, len);
- udp_statistics.UdpInErrors++;
- kfree_skb(skb, FREE_WRITE);
- return(0);
- }
-
- if (uh->check && udp_check(uh, len, saddr, daddr)) //进行UDP数据包校验
- {
- /* <mea@utu.fi> wants to know, who sent it, to
- go and stomp on the garbage sender... */
- printk("UDP: bad checksum. From %08lX:%d to %08lX:%d ulen %d\n",
- ntohl(saddr),ntohs(uh->source),
- ntohl(daddr),ntohs(uh->dest),
- ulen);
- udp_statistics.UdpInErrors++;
- kfree_skb(skb, FREE_WRITE);
- return(0);
- }
-
-
- len=ulen;//对len赋值为实际的UDP数据报长度
-
- #ifdef CONFIG_IP_MULTICAST//对多播情况进行处理
- if (addr_type!=IS_MYADDR)
- {
- /*
- * Multicasts and broadcasts go to each listener.
- */
- struct sock *sknext=NULL;//next指针
-
- /*get_sock_mcast 获取在对应端口的多播套接字队列
- *下面函数的参数依次表示:sock结构指针,本地端口,远端地址,远端端口,本地地址
- */
-
- sk=get_sock_mcast(udp_prot.sock_array[ntohs(uh->dest)&(SOCK_ARRAY_SIZE-1)], uh->dest,
- saddr, uh->source, daddr);
- if(sk)
- {
- do
- {
- struct sk_buff *skb1;
-
- sknext=get_sock_mcast(sk->next, uh->dest, saddr, uh->source, daddr);//下一个满足条件的套接字
- if(sknext)
- skb1=skb_clone(skb,GFP_ATOMIC);
- else
- skb1=skb;
- if(skb1)
- udp_deliver(sk, uh, skb1, dev,saddr,daddr,len);//对满足条件的套接字调用发送函数发送
- sk=sknext;
- }
- while(sknext!=NULL);
- }
- else
- kfree_skb(skb, FREE_READ);
- return 0;
- }
- #endif
- sk = get_sock(&udp_prot, uh->dest, saddr, uh->source, daddr);
- if (sk == NULL) //没有找到本地对应的套接字,则进行出错处理
- {
- udp_statistics.UdpNoPorts++;
- if (addr_type == IS_MYADDR)
- {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);//回复ICMP出错报文,目的主机不可达
- }
- /*
- * Hmm. We got an UDP broadcast to a port to which we
- * don‘t wanna listen. Ignore it.
- */
- skb->sk = NULL;
- kfree_skb(skb, FREE_WRITE);
- return(0);
- }
-
- return udp_deliver(sk,uh,skb,dev, saddr, daddr, len);//调用函数发送套接字
- }
上面函数中调用了get_sock_mcast()函数,下面具体分析一下该函数的功能,该函数定义的位置在文件af_inet.c中
- /*
- * Deliver a datagram to broadcast/multicast sockets.
- */
-
- struct sock *get_sock_mcast(struct sock *sk, //套接字指针
- unsigned short num,//本地端口
- unsigned long raddr,//远端地址
- unsigned short rnum,//远端端口
- unsigned long laddr)//本地地址
- {
- struct sock *s;
- unsigned short hnum;
-
- hnum = ntohs(num);
-
- /*
- * SOCK_ARRAY_SIZE must be a power of two. This will work better
- * than a prime unless 3 or more sockets end up using the same
- * array entry. This should not be a problem because most
- * well known sockets don‘t overlap that much, and for
- * the other ones, we can just be careful about picking our
- * socket number when we choose an arbitrary one.
- */
-
- s=sk;
-
- for(; s != NULL; s = s->next)
- {
- if (s->num != hnum) //本地端口不符合,跳过
- continue;
- if(s->dead && (s->state == TCP_CLOSE))//dead=1表示该sock结构已经处于释放状态
- continue;
- if(s->daddr && s->daddr!=raddr)//sock的远端地址不等于条件中的远端地址
- continue;
- if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0)
- continue;
- if(s->saddr && s->saddr!=laddr)//sock的本地地址不等于条件的本地地址
- continue;
- return(s);
- }
- return(NULL);
- }
下面是udp_rcv调用的udp_deliver()函数
- static int udp_deliver(struct sock *sk,//sock结构指针
- struct udphdr *uh,//UDP头指针
- struct sk_buff *skb,//sk_buff
- struct device *dev,//接收的网络设备
- long saddr,//本地地址
- long daddr,//远端地址
- int len)//数据包的长度
- {
- //对skb结构相应字段赋值
- skb->sk = sk;
- skb->dev = dev;
- //skb->len = len;
-
- /*
- * These are supposed to be switched.
- */
-
- skb->daddr = saddr;//设置目的地址为本地地址
- skb->saddr = daddr;//设置源地址为远端地址
-
-
- /*
- * Charge it to the socket, dropping if the queue is full.
- */
-
- skb->len = len - sizeof(*uh);
-
- if (sock_queue_rcv_skb(sk,skb)<0) //调用sock_queu_rcv_skb()函数,将skb挂到sk接构中的接收队列中
- {
- udp_statistics.UdpInErrors++;
- ip_statistics.IpInDiscards++;
- ip_statistics.IpInDelivers--;
- skb->sk = NULL;
- kfree_skb(skb, FREE_WRITE);
- release_sock(sk);
- return(0);
- }
- udp_statistics.UdpInDatagrams++;
- release_sock(sk);
- return(0);
- }
sock_queu_rcv_skb()函数的实现如下:
- /*
- * Queue a received datagram if it will fit. Stream and sequenced protocols
- * can‘t normally use this as they need to fit buffers in and play with them.
- */
-
- int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
- {
- unsigned long flags;
- if(sk->rmem_alloc + skb->mem_len >= sk->rcvbuf)
- return -ENOMEM;
- save_flags(flags);
- cli();
- sk->rmem_alloc+=skb->mem_len;
- skb->sk=sk;
- restore_flags(flags);
- skb_queue_tail(&sk->receive_queue,skb);
- if(!sk->dead)
- sk->data_ready(sk,skb->len);
- return 0;
- }
这里就完成了数据包从网络层到传输层的传输。下面的博文将会分析数据包的从上到下的传输过程。