标签:
一、系统调用select,把原来当前进程的单睡眠等待状态变成了现在的多睡眠等待状态。具体请看代码,select在内核中的实现为sys_select,代码如下:
asmlinkage long
sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)//inp,outp,exp是关于已打开文件的位图,tvp表明准备睡眠等待的最长时间,0表示无限期的睡眠等待,这些指针都指向了用户空间,详细解释请看下面
{
	fd_set_bits fds;
	char *bits;
	long timeout;
	int ret, size;
	timeout = MAX_SCHEDULE_TIMEOUT;
	if (tvp) {
		time_t sec, usec;
		if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
		    || (ret = __get_user(sec, &tvp->tv_sec))
		    || (ret = __get_user(usec, &tvp->tv_usec)))//从用户空间拷贝timeval结构tvp到内核空间的变量
			goto out_nofds;
		ret = -EINVAL;
		if (sec < 0 || usec < 0)
			goto out_nofds;
		if ((unsigned long) sec < MAX_SELECT_SECONDS) {
			timeout = ROUND_UP(usec, 1000000/HZ);//转换成timeout
			timeout += sec * (unsigned long) HZ;
		}
	}
	ret = -EINVAL;
	if (n < 0)
		goto out_nofds;
	if (n > current->files->max_fdset)
		n = current->files->max_fdset;
	/*
	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
	 * since we used fdset we need to allocate memory in units of
	 * long-words. 
	 */
	ret = -ENOMEM;
	size = FDS_BYTES(n);
	bits = select_bits_alloc(size);//分配一小块空间用于这6个位图
	if (!bits)
		goto out_nofds;
	fds.in      = (unsigned long *)  bits;//第一个位图空间
	fds.out     = (unsigned long *) (bits +   size);//第二个位图空间
	fds.ex      = (unsigned long *) (bits + 2*size);//以此类推
	fds.res_in  = (unsigned long *) (bits + 3*size);
	fds.res_out = (unsigned long *) (bits + 4*size);
	fds.res_ex  = (unsigned long *) (bits + 5*size);
	if ((ret = get_fd_set(n, inp, fds.in)) || //把3个"要求"位图从用户空间复制过来,复制到fds中
	    (ret = get_fd_set(n, outp, fds.out)) ||
	    (ret = get_fd_set(n, exp, fds.ex)))
		goto out;
	zero_fd_set(n, fds.res_in);//清零fds.res_in,也就是清零输出
	zero_fd_set(n, fds.res_out);
	zero_fd_set(n, fds.res_ex);
	ret = do_select(n, &fds, &timeout);//操作的主体
	if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
		time_t sec = 0, usec = 0;
		if (timeout) {
			sec = timeout / HZ;//timeout转换成sec,usec
			usec = timeout % HZ;
			usec *= (1000000/HZ);
		}
		put_user(sec, &tvp->tv_sec);//把sec,uesc,也就是剩余的时间返回给用户空间的timeval结构tvp
		put_user(usec, &tvp->tv_usec);
	}
	if (ret < 0)
		goto out;
	if (!ret) {
		ret = -ERESTARTNOHAND;
		if (signal_pending(current))
			goto out;
		ret = 0;
	}
	set_fd_set(n, inp, fds.res_in);//把结果返回到用户空间
	set_fd_set(n, outp, fds.res_out);
	set_fd_set(n, exp, fds.res_ex);
out:
	select_bits_free(bits, size);
out_nofds:
	return ret;
}其中fd_set,是关于已打开文件的位图,位图中的每一个位都代表着当前进程的一个已打开文件。
typedef __kernel_fd_set		fd_set;
typedef struct {
	unsigned long fds_bits [__FDSET_LONGS];
} __kernel_fd_set;    inp,outp,exp都是fd_set结构,都是关于文件的位图。inp所指的位图表示当前进程在睡眠中要等待来自哪一些已打开文件的输入,也就是要读取输入;返回时则表明对哪些已打开文件中已经有了输入,可以读了。类似地,outp表示当前进程在睡眠中药等待对哪一些已打开文件的写操作;返回时则表明对哪一些已打开文件的写操作已可立即进行。至于exp,则用来监视在哪一些通道中发生了异常。参数n表示调用时的参数表中有几个位图。static inline
int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
{
	nr = FDS_BYTES(nr);
	if (ufdset) {
		int error;
		error = verify_area(VERIFY_WRITE, ufdset, nr);
		if (!error && __copy_from_user(fdset, ufdset, nr))
			error = -EFAULT;
		return error;
	}
	memset(fdset, 0, nr);
	return 0;
}
    最重要的函数,就是do_select,代码如下:int do_select(int n, fd_set_bits *fds, long *timeout)//fds是个指针,指向一个fd_set_bits结构,结构中就是6个工作位图,其中前3个为"要求"位图
{
	poll_table table, *wait;
	int retval, i, off;
	long __timeout = *timeout;
 	read_lock(¤t->files->file_lock);
	retval = max_select_fd(n, fds);//根据这3个位图计算出本次操作所涉及最大的已打开文件号是什么
	read_unlock(¤t->files->file_lock);
	if (retval < 0)
		return retval;
	n = retval;
	poll_initwait(&table);//初始化poll_table结构table
	wait = &table;//刚刚初始化的table的指针赋值给wait
	if (!__timeout)
		wait = NULL;
	retval = 0;
	for (;;) {//第一次循环
		set_current_state(TASK_INTERRUPTIBLE);//设置当前进程为TASK_INTERRUPTIBLE
		for (i = 0 ; i < n; i++) {
			unsigned long bit = BIT(i);
			unsigned long mask;
			struct file *file;
			off = i / __NFDBITS;
			if (!(bit & BITS(fds, off)))//如果三个位图之一中的某一位为1,就对应的已打开文件作一次询问,#define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
				continue;
			file = fget(i);//获取文件
			mask = POLLNVAL;
			if (file) {
				mask = DEFAULT_POLLMASK;
				if (file->f_op && file->f_op->poll)//这里我们举两个例子,一个是管道文件读端poll是pipe_poll,一个是鼠标文件poll是aux_poll	
					mask = file->f_op->poll(file, wait);
				fput(file);
			}
			if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {//第一次循环,我们假设没有要返回的结果,(就是上文中红字标出的部分的内容)。
				SET(bit, __RES_IN(fds,off));
				retval++;
				wait = NULL;
			}
			if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
				SET(bit, __RES_OUT(fds,off));
				retval++;
				wait = NULL;
			}
			if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
				SET(bit, __RES_EX(fds,off));
				retval++;
				wait = NULL;
			}
		}
		wait = NULL;
		if (retval || !__timeout || signal_pending(current))
			break;
		if(table.error) {
			retval = table.error;
			break;
		}
		__timeout = schedule_timeout(__timeout);//睡眠等待
	}
	current->state = TASK_RUNNING;
	poll_freewait(&table);
	/*
	 * Up-to-date the caller timeout.
	 */
	*timeout = __timeout;
	return retval;
}    poll_initwait,初始化poll_table结构table,代码如下:static inline void poll_initwait(poll_table* pt)
{
	pt->error = 0;
	pt->table = NULL;
}    其中相关的数据结构定义如下:typedef struct poll_table_struct {//分配一个页面大小
	int error;
	struct poll_table_page * table;
} poll_table;
struct poll_table_page {
	struct poll_table_page * next;//一个页面用完了,就再分配一个,通过指针next连成一条单链
	struct poll_table_entry * entry;//指向entries[]中第一个空闲的poll_table_entry结构
	struct poll_table_entry entries[0];//下标为0,表示该数组的大小可以动态地确定。实际使用时总是分配一个页面,页面中能容纳几个poll_table_entry结构,这个数组就是多大
};
struct poll_table_entry {
	struct file * filp;
	wait_queue_t wait;
	wait_queue_head_t * wait_address;
};    等待单一对象时,一般都把wait_queue_t数据结构建立在堆栈中。可是,有多个等待对象时就不能那样了。另一方面,在有多个等待对象、从而有多个wait_queue_t数据结构时,要有个既灵活,又有效的方法将这些wait_queue_t结构管理起来。上面这些数据结构就是为此而设计的。返回到do_select,file->f_op->poll,这里我们举两个例子,一个是管道文件读端poll是pipe_poll,一个是鼠标文件poll是aux_poll。我们首先看pipe_poll,代码如下:
static unsigned int
pipe_poll(struct file *filp, poll_table *wait)
{
	unsigned int mask;
	struct inode *inode = filp->f_dentry->d_inode;
	poll_wait(filp, PIPE_WAIT(*inode), wait);
	/* Reading only -- no need for acquiring the semaphore.  */
	mask = POLLIN | POLLRDNORM;
	if (PIPE_EMPTY(*inode))
		mask = POLLOUT | POLLWRNORM;
	if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
		mask |= POLLHUP;
	if (!PIPE_READERS(*inode))
		mask |= POLLERR;
	return mask;
}#define PIPE_WAIT(inode) (&(inode).i_pipe->wait)
extern inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
	if (p && wait_address)//只有两个都不为NULL时才会执行
		__pollwait(filp, wait_address, p);
}void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
	struct poll_table_page *table = p->table;
	if (!table || POLL_TABLE_FULL(table)) {//如果还没有用于poll_table_page结构的页面,或者最后分配的页面已经用完而不再有空闲的poll_table_entry结构,就要为其分配一个新的页面,扩充其容量
		struct poll_table_page *new_table;
		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
		if (!new_table) {
			p->error = -ENOMEM;
			__set_current_state(TASK_RUNNING);
			return;
		}
		new_table->entry = new_table->entries;
		new_table->next = table;
		p->table = new_table;//一个页面用完了,就再分配一个,通过指针next连成一条单链
		table = new_table;//刚分配完新的table,table可能还指向过去的poll_table_page,所以要更新下,接下来的代码会使用
	}
	/* Add a new entry */
	{
		struct poll_table_entry * entry = table->entry;
		table->entry = entry+1;//腾出一个poll_table_entry的大小
	 	get_file(filp);
	 	entry->filp = filp;//赋值
		entry->wait_address = wait_address;//&(inode).i_pipe->wait
		init_waitqueue_entry(&entry->wait, current);//current与entry->wait相关联
		add_wait_queue(wait_address,&entry->wait);//entry->wait挂在了wait_address也就是&(inode).i_pipe->wait上
	}
}    下面我们看aux_poll,代码如下:
static unsigned int aux_poll(struct file *file, poll_table * wait)//wait同样是poll_table的指针,和上面一样
{
	poll_wait(file, &queue->proc_list, wait);//和上面一样了
	if (!queue_empty())
		return POLLIN | POLLRDNORM;
	return 0;
}struct qp_queue {
	unsigned long head;
	unsigned long tail;
	wait_queue_head_t proc_list;
	struct fasync_struct *fasync;
	unsigned char buf[QP_BUF_SIZE];
};
static struct qp_queue *queue;    至此,当前进程current已经被链入了两个队列,一个是queue->proc_list,一个是&(inode).i_pipe->wait。我们假设第一次循环,我们假设没有要返回的结果,(就是上文中红字标出的部分的内容)。然后这个进程睡眠等待。
下面,我们来看看如何唤醒这个进程呢?
对于第一个管道文件,代码如下:
static ssize_t
pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
{
             ......
             do {
			/*
			 * Synchronous wake-up: it knows that this process
			 * is going to give up this CPU, so it doesnt have
			 * to do idle reschedules.
			 */
			wake_up_interruptible_sync(PIPE_WAIT(*inode));//唤醒了等待着的进程
			PIPE_WAITING_WRITERS(*inode)++;
			pipe_wait(inode);
			PIPE_WAITING_WRITERS(*inode)--;
			if (signal_pending(current))
				goto out;
			if (!PIPE_READERS(*inode))
				goto sigpipe;
		} while (!PIPE_FREE(*inode));
}    对于第二个文件,鼠标文件,代码如下:static inline void handle_mouse_event(unsigned char scancode)
{
 ......
if (head != queue->tail) {
			queue->head = head;
			kill_fasync(&queue->fasync, SIGIO, POLL_IN);
			wake_up_interruptible(&queue->proc_list);//唤醒了等待着的进程
		}
......
}int do_select(int n, fd_set_bits *fds, long *timeout)//fds是个指针,指向一个fd_set_bits结构,结构中就是6个工作位图,其中前3个为"要求"位图
{
	poll_table table, *wait;
	int retval, i, off;
	long __timeout = *timeout;
 	read_lock(¤t->files->file_lock);
	retval = max_select_fd(n, fds);//根据这3个位图计算出本次操作所涉及最大的已打开文件号是什么
	read_unlock(¤t->files->file_lock);
	if (retval < 0)
		return retval;
	n = retval;
	poll_initwait(&table);//初始化poll_table结构table
	wait = &table;//刚刚初始化的table的指针赋值给wait
	if (!__timeout)
		wait = NULL;
	retval = 0;
	for (;;) {//第二次循环
		set_current_state(TASK_INTERRUPTIBLE);//设置当前进程为TASK_INTERRUPTIBLE
		for (i = 0 ; i < n; i++) {
			unsigned long bit = BIT(i);
			unsigned long mask;
			struct file *file;
			off = i / __NFDBITS;
			if (!(bit & BITS(fds, off)))//如果三个位图之一中的某一位为1,就对应的已打开文件作一次询问,#define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
				continue;
			file = fget(i);//获取文件
			mask = POLLNVAL;
			if (file) {
				mask = DEFAULT_POLLMASK;
				if (file->f_op && file->f_op->poll)//这里我们举两个例子,一个是管道文件读端poll是pipe_poll,一个是鼠标文件poll是aux_poll	
					mask = file->f_op->poll(file, wait);//由于在睡眠之前已经赋值为NULL,第二次循环wait为NULL
				fput(file);
			}
			if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {//第二次循环,根据mask,设置我们要返回的结果;也就是把询问的结果汇集到fds所指的fd_set_bits数据结构中
				SET(bit, __RES_IN(fds,off));//#define __RES_IN(fds, n)	(fds->res_in + n)
				retval++;
				wait = NULL;
			}
			if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {//#define __RES_OUT(fds, n)	(fds->res_out + n)
				SET(bit, __RES_OUT(fds,off));
				retval++;
				wait = NULL;
			}
			if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {//#define __RES_EX(fds, n)	(fds->res_ex + n)
				SET(bit, __RES_EX(fds,off));
				retval++;
				wait = NULL;
			}
		}
		wait = NULL;//注意这里,在睡眠之前已经赋值为NULL,第二次循环wait为NULL
		if (retval || !__timeout || signal_pending(current))//注意这里,retval排在首位,其次是!_timeout,最后才是signal_pending(current)
			break;
		if(table.error) {
			retval = table.error;
			break;
		}
		__timeout = schedule_timeout(__timeout);//睡眠等待
	}
	current->state = TASK_RUNNING;//当前进程状态为TASK_RUNNING
	poll_freewait(&table);//把所有的wait_queue_t结构从各个等待队里中摘下来
	/*
	 * Up-to-date the caller timeout.
	 */
	*timeout = __timeout;
	return retval;
}    如果wait为NULL,那么只返回相关的mask。static unsigned int
pipe_poll(struct file *filp, poll_table *wait)
{
	unsigned int mask;
	struct inode *inode = filp->f_dentry->d_inode;
	poll_wait(filp, PIPE_WAIT(*inode), wait);//不会执行
	/* Reading only -- no need for acquiring the semaphore.  */
	mask = POLLIN | POLLRDNORM;
	if (PIPE_EMPTY(*inode))
		mask = POLLOUT | POLLWRNORM;
	if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
		mask |= POLLHUP;
	if (!PIPE_READERS(*inode))
		mask |= POLLERR;
	return mask;
}static unsigned int aux_poll(struct file *file, poll_table * wait)//wait同样是poll_table的指针,和上面一样
{
	poll_wait(file, &queue->proc_list, wait);//和上面一样了
	if (!queue_empty())
		return POLLIN | POLLRDNORM;
	return 0;
}extern inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
	if (p && wait_address)//只有两个都不为NULL时才会执行
		__pollwait(filp, wait_address, p);
}    poll_freewait,把所有的wait_queue_t结构从各个等待队里中摘下来,代码如下:void poll_freewait(poll_table* pt)
{
	struct poll_table_page * p = pt->table;
	while (p) {
		struct poll_table_entry * entry;
		struct poll_table_page *old;
		entry = p->entry;
		do {
			entry--;
			remove_wait_queue(entry->wait_address,&entry->wait);
			fput(entry->filp);
		} while (entry > p->entries);//p->entries一直指向第一个poll_table_entry
		old = p;
		p = p->next;//下一个poll_table_page结构
		free_page((unsigned long) old);//释放poll_table_page结构所在的一个页面
	}
}    最后返回到sys_select,把询问的结果返回到用户空间。set_fd_set(n, inp, fds.res_in);//把结果返回到用户空间 set_fd_set(n, outp, fds.res_out); set_fd_set(n, exp, fds.res_ex);最后,inp所指的位图表示当前进程在睡眠中要等待来自哪一些已打开文件的输入,也就是要读取输入;返回时则表明对哪些已打开文件中已经有了输入,可以读了。类似地,outp表示当前进程在睡眠中药等待对哪一些已打开文件的写操作;返回时则表明对哪一些已打开文件的写操作已可立即进行。至于exp,则用来监视在哪一些通道中发生了异常。
需要对设备的异步操作时,一个进程必须作好下列准备:
1、先打开目标设备。
2、设置好对目标设备的SIGIO信号处理程序。
3、通过系统调用fcntl将本进程设置成目标设备(已打开文件)的"主人"。
4、通过系统调用ioctl将目标设备(已打开文件)设置成异步操作模式。
我们直接看第三步:
系统调用fcntl,在内核中的实现是sys_fcntl,它的主体代码do_fcntl如下:
static long do_fcntl(unsigned int fd, unsigned int cmd,
		     unsigned long arg, struct file * filp)//fd为鼠标文件的文件号
{
	long err = -EINVAL;
	switch (cmd) {
                ......
		case F_SETOWN:
			lock_kernel();
			filp->f_owner.pid = arg;
			filp->f_owner.uid = current->uid;
			filp->f_owner.euid = current->euid;
			err = 0;
			if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
				err = sock_fcntl (filp, F_SETOWN, arg);
			unlock_kernel();
			break;
		......
	}
	return err;
}   其中的数据结构如下:struct file {
	struct list_head	f_list;
	struct dentry		*f_dentry;
	struct vfsmount         *f_vfsmnt;
	struct file_operations	*f_op;
	atomic_t		f_count;
	unsigned int 		f_flags;
	mode_t			f_mode;
	loff_t			f_pos;
	unsigned long 		f_reada, f_ramax, f_raend, f_ralen, f_rawin;
	struct fown_struct	f_owner;
	unsigned int		f_uid, f_gid;
	int			f_error;
	unsigned long		f_version;
	/* needed for tty driver, and maybe others */
	void			*private_data;
};struct fown_struct {
	int pid;		/* pid or -pgrp where SIGIO should be sent */
	uid_t uid, euid;	/* uid/euid of process setting the owner */
	int signum;		/* posix.1b rt signal to be delivered on IO */
};asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)//arg可以是1或者0,fd为鼠标文件的文件号
{	,
	struct file * filp;
	unsigned int flag;
	int on, error = -EBADF;
	filp = fget(fd);
	if (!filp)
		goto out;
	error = 0;
	lock_kernel();
	switch (cmd) {
		......
		case FIOASYNC:
			if ((error = get_user(on, (int *)arg)) != 0)
				break;
			flag = on ? FASYNC : 0;
			/* Did FASYNC state change ? */
			if ((flag ^ filp->f_flags) & FASYNC) {
				if (filp->f_op && filp->f_op->fasync)
					error = filp->f_op->fasync(fd, filp, on);//以鼠标文件的file_operations结构pasux_fops为例,其指针fasync指向fasync_aux	
				else error = -ENOTTY;
			}
			if (error != 0)
				break;
			if (on)
				filp->f_flags |= FASYNC;
			else
				filp->f_flags &= ~FASYNC;
			break;
                 ......
	}
	unlock_kernel();
	fput(filp);
out:
	return error;
}
    fasync_aux,代码如下:static int fasync_aux(int fd, struct file *filp, int on)
{
	int retval;
	retval = fasync_helper(fd, filp, on, &queue->fasync);//为当前进程创建一个fasync_struct数据结构,并将其挂入目标设备
	if (retval < 0)
		return retval;
	return 0;
}struct qp_queue {
	unsigned long head;
	unsigned long tail;
	wait_queue_head_t proc_list;
	struct fasync_struct *fasync;
	unsigned char buf[QP_BUF_SIZE];
};
static struct qp_queue *queue;struct fasync_struct {
	int	magic;
	int	fa_fd;
	struct	fasync_struct	*fa_next; /* singly linked list */
	struct	file 		*fa_file;
};    fasync_helper,为当前进程创建一个fasync_struct数据结构,并将其挂入目标设备,代码如下:int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
{
	struct fasync_struct *fa, **fp;
	struct fasync_struct *new = NULL;
	int result = 0;
	if (on) {
		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);//分配了一个fasync_struct结构
		if (!new)
			return -ENOMEM;
	}
	write_lock_irq(&fasync_lock);
	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {//假设不执行
		if (fa->fa_file == filp) {
			if(on) {
				fa->fa_fd = fd;
				kmem_cache_free(fasync_cache, new);
			} else {
				*fp = fa->fa_next;
				kmem_cache_free(fasync_cache, fa);
				result = 1;
			}
			goto out;
		}
	}
	if (on) {
		new->magic = FASYNC_MAGIC;
		new->fa_file = filp;
		new->fa_fd = fd;
		new->fa_next = *fapp;
		*fapp = new;//挂入到queue->fasync
		result = 1;
	}
out:
	write_unlock_irq(&fasync_lock);
	return result;
}    至此,所有的准备工作都做好了,当前进程继续执行自己的程序。这样,当目标设备中状态发生变化时,就会向进程发送一个SIGIO信号。平时这个进程可以干自己的事,当接收到SIGIO信号时就转入前台的信号处理程序中,完成对设备的具体操作。显然,此时的进程就好像受到外部设备中断的CPU一样。
那么,我们看,当目标设备,我们这个例子中是鼠标文件,发生变化时,是什么样的处理流程?
鼠标文件发生变化时,会调用handle_mouse_event,代码如下:
static inline void handle_mouse_event(unsigned char scancode)
{
 ......
if (head != queue->tail) {
			queue->head = head;
			kill_fasync(&queue->fasync, SIGIO, POLL_IN);//扫描鼠标文件的fasync队列,向每个有关的进程发出一个SIGIO信号,并将POLL_IN传给各个进程的SIGIO信号服务程序作为参数,使其知道接收信号的原因是通道中有了输入
			wake_up_interruptible(&queue->proc_list);//唤醒了等待着的进程
		}
......
}    kill_fasync,扫描鼠标文件的fasync队列,向每个有关的进程发出一个SIGIO信号,并将POLL_IN传给各个进程的SIGIO信号服务程序作为参数,使其知道接收信号的原因是通道中有了输入,代码如下:void kill_fasync(struct fasync_struct **fp, int sig, int band)
{
	read_lock(&fasync_lock);
	__kill_fasync(*fp, sig, band);
	read_unlock(&fasync_lock);
}void __kill_fasync(struct fasync_struct *fa, int sig, int band)
{
	while (fa) {
		struct fown_struct * fown;
		if (fa->magic != FASYNC_MAGIC) {
			printk(KERN_ERR "kill_fasync: bad magic number in "
			       "fasync_struct!\n");
			return;
		}
		fown = &fa->fa_file->f_owner;//找到fown_struct
		/* Don‘t send SIGURG to processes which have not set a
		   queued signum: SIGURG has its own default signalling
		   mechanism. */
		if (fown->pid && !(sig == SIGURG && fown->signum == 0))
			send_sigio(fown, fa->fa_fd, band);
		fa = fa->fa_next;//下一个fasync_struct结构
	}
}void send_sigio(struct fown_struct *fown, int fd, int band)
{
	struct task_struct * p;
	int   pid	= fown->pid;//里面保存了进程的pid号
	
	read_lock(&tasklist_lock);
	if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {//根据pid好找到task_struct结构
		send_sigio_to_task(p, fown, fd, band);
		goto out;
	}
	for_each_task(p) {
		int match = p->pid;
		if (pid < 0)
			match = -p->pgrp;
		if (pid != match)
			continue;
		send_sigio_to_task(p, fown, fd, band);//向指定的进程发送信号
	}
out:
	read_unlock(&tasklist_lock);
}static void send_sigio_to_task(struct task_struct *p,
			       struct fown_struct *fown, 
			       int fd,
			       int reason)
{
	if ((fown->euid != 0) &&
	    (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
	    (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
		return;
	switch (fown->signum) {//目前signum为0
		siginfo_t si;
		default:
			/* Queue a rt signal with the appropriate fd as its
			   value.  We use SI_SIGIO as the source, not 
			   SI_KERNEL, since kernel signals always get 
			   delivered even if we can‘t queue.  Failure to
			   queue in this case _should_ be reported; we fall
			   back to SIGIO in that case. --sct */
			si.si_signo = fown->signum;
			si.si_errno = 0;
		        si.si_code  = reason & ~__SI_MASK;
			/* Make sure we are called with one of the POLL_*
			   reasons, otherwise we could leak kernel stack into
			   userspace.  */
			if ((reason & __SI_MASK) != __SI_POLL)
				BUG();
			if (reason - POLL_IN >= NSIGPOLL)
				si.si_band  = ~0L;
			else
				si.si_band = band_table[reason - POLL_IN];
			si.si_fd    = fd;
			if (!send_sig_info(fown->signum, &si, p))
				break;
		/* fall-through: fall back on the old plain SIGIO signal */
		case 0:
			send_sig(SIGIO, p, 1);//向进程发送SIGIO信号
	}
}Linux内核源代码情景分析-系统调用select以及异步输入/输出
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/45559965