本文分析基于Linux Kernel 1.2.13
原创作品,转载请标明http://blog.csdn.net/yming0221/article/details/7547826
更多请查看专栏,地址http://blog.csdn.net/column/details/linux-kernel-net.html
作者:闫明
注:标题中的”(上)“,”(下)“表示分析过程基于数据包的传递方向:”(上)“表示分析是从底层向上分析、”(下)“表示分析是从上向下分析。
下面是发送数据的流程:
应用层发送数据包的入口函数是BSD socket层的sock_write()函数,在分析该函数之前,先分析下socket的创建,系统调用sys_socket()对应的BSD socket层函数为sock_socket()
sock_socket()函数
- /*
- * Perform the socket system call. we locate the appropriate
- * family, then create a fresh socket.
- */
-
- static int sock_socket(int family, int type, int protocol)
- {
- int i, fd;
- struct socket *sock;
- struct proto_ops *ops;
-
- /* Locate the correct protocol family. */
- for (i = 0; i < NPROTO; ++i) //查找对应的协议族
- {
- if (pops[i] == NULL) continue;
- if (pops[i]->family == family)
- break;
- }
-
- if (i == NPROTO) //查找未果,返回错误
- {
- return -EINVAL;
- }
-
- ops = pops[i];//指针指向该协议族的原型操作函数集
-
- /*
- * Check that this is a type that we know how to manipulate and
- * the protocol makes sense here. The family can still reject the
- * protocol later.
- */
-
- if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
- type != SOCK_SEQPACKET && type != SOCK_RAW &&
- type != SOCK_PACKET) || protocol < 0)
- return(-EINVAL);
-
- /*
- * Allocate the socket and allow the family to set things up. if
- * the protocol is 0, the family is instructed to select an appropriate
- * default.
- */
-
- if (!(sock = sock_alloc())) //获取一个socket,已经完成了socket部分初始化设置
- {
- printk("NET: sock_socket: no more sockets\n");
- return(-ENOSR); /* Was: EAGAIN, but we are out of
- system resources! */
- }
-
- sock->type = type;
- sock->ops = ops;
- if ((i = sock->ops->create(sock, protocol)) < 0) //调用INET层函数,inet_create()函数,创建inet层的socket,即sock结构
- {
- sock_release(sock);
- return(i);
- }
-
- if ((fd = get_fd(SOCK_INODE(sock))) < 0) //根据sock结构中的inode,分配文件描述符
- {
- sock_release(sock);
- return(-EINVAL);
- }
-
- return(fd);
- }
该函数的大体功能:
1、分配socket,sock结构,用于BSD和INET层的socket
2、分配inode和file结构,用于文件操作
3、返回文件操作描述符,用于应用程序的使用
其中初始化分配一个socket的方法如下:
- /*
- * Allocate a socket.
- */
-
- struct socket *sock_alloc(void)
- {
- struct inode * inode;
- struct socket * sock;
-
- inode = get_empty_inode();//获一个空的文件结点
- if (!inode)
- return NULL;
- //文件结点相应字段赋值
- inode->i_mode = S_IFSOCK;
- inode->i_sock = 1;
- inode->i_uid = current->uid;
- inode->i_gid = current->gid;
-
- sock = &inode->u.socket_i;//给sicket结构指针赋值,可以看到inode和socket一一对应
- sock->state = SS_UNCONNECTED;
- sock->flags = 0;
- sock->ops = NULL;
- sock->data = NULL;
- sock->conn = NULL;
- sock->iconn = NULL;
- sock->next = NULL;
- sock->wait = &inode->i_wait;
- sock->inode = inode; /* "backlink": we could use pointer arithmetic instead */
- sock->fasync_list = NULL;
- sockets_in_use++;
- return sock;
- }
执行完,然后调用INET层的inet_create()函数
返回文件描述的操作符
- /*
- * Obtains the first available file descriptor and sets it up for use.
- */
- //根据文件inode指针创建文件结构,并返回文件操作的操作符,用于应用程序的使用
- static int get_fd(struct inode *inode)
- {
- int fd;
- struct file *file;
-
- /*
- * Find a file descriptor suitable for return to the user.
- */
-
- file = get_empty_filp();
- if (!file)
- return(-1);
-
- for (fd = 0; fd < NR_OPEN; ++fd)
- if (!current->files->fd[fd])
- break;
- if (fd == NR_OPEN)
- {
- file->f_count = 0;
- return(-1);
- }
-
- FD_CLR(fd, ¤t->files->close_on_exec);
- current->files->fd[fd] = file;
- file->f_op = &socket_file_ops;
- file->f_mode = 3;
- file->f_flags = O_RDWR;
- file->f_count = 1;
- file->f_inode = inode;
- if (inode)
- inode->i_count++;
- file->f_pos = 0;
- return(fd);
- }
下面开始正式看发送数据的最顶层函数--sock_write()函数
- /*
- * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is
- * readable by the user process.
- */
-
- static int sock_write(struct inode *inode, struct file *file, char *ubuf, int size)
- {
- struct socket *sock;
- int err;
-
- if (!(sock = socki_lookup(inode))) //返回inode结构的对应的socket结构
- {
- printk("NET: sock_write: can‘t find socket for inode!\n");
- return(-EBADF);
- }
-
- if (sock->flags & SO_ACCEPTCON)
- return(-EINVAL);
-
- if(size<0)
- return -EINVAL;
- if(size==0)
- return 0;
-
- if ((err=verify_area(VERIFY_READ,ubuf,size))<0)
- return err;
- return(sock->ops->write(sock, ubuf, size,(file->f_flags & O_NONBLOCK)));//调用inet_write()函数
- }
inet_write()函数
- static int inet_write(struct socket *sock, char *ubuf, int size, int noblock)
- {
- return inet_send(sock,ubuf,size,noblock,0);
- }
inet_send()函数
- static int inet_send(struct socket *sock, void *ubuf, int size, int noblock,
- unsigned flags)
- {
- struct sock *sk = (struct sock *) sock->data;//从socket结构中取出sock指针
- if (sk->shutdown & SEND_SHUTDOWN)
- {
- send_sig(SIGPIPE, current, 1);
- return(-EPIPE);
- }
- if(sk->err)
- return inet_error(sk);
- /* We may need to bind the socket. */
- if(inet_autobind(sk)!=0)//自动分配本地端口号,并将sk根据端口号加入sock表中
- return(-EAGAIN);
- return(sk->prot->write(sk, (unsigned char *) ubuf, size, noblock, flags));//调用udp_write()函数
- }
这样系统就会调用传输层(还是以UDP为例)的函数udp_write()来发送数据,这样数据就从应用层到了传输层。下篇分析传输层向网络层的数据传输。