标签:
由于proc文件系统并不物理地存在于任何设备上,它的安装过程是特殊的。对proc文件系统不能直接通过mount()来安装,而要先由系统内核在内核初始化时自动地通过一个函数kern_mount()安装一次,然后再由处理系统初始化的进程通过mount()安装,实际上是"重安装"。
一、在内核初始化时调用init_proc_fs(),代码如下:
static DECLARE_FSTYPE(proc_fs_type, "proc", proc_read_super, FS_SINGLE);
static int __init init_proc_fs(void)
{
int err = register_filesystem(&proc_fs_type);//向系统登记"proc"这么一种文件系统
if (!err) {
proc_mnt = kern_mount(&proc_fs_type);//将一个具体的proc文件系统安装到系统中的/proc节点上
err = PTR_ERR(proc_mnt);
if (IS_ERR(proc_mnt))
unregister_filesystem(&proc_fs_type);
else
err = 0;
}
return err;
}#define DECLARE_FSTYPE(var,type,read,flags) struct file_system_type var = { name: type, read_super: read, fs_flags: flags, owner: THIS_MODULE, }
register_filesystem,向系统登记"proc"这么一种文件系统,代码如下:int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
if (!fs)
return -EINVAL;
if (fs->next)
return -EBUSY;
write_lock(&file_systems_lock);
p = find_filesystem(fs->name);
if (*p)
res = -EBUSY;
else
*p = fs;//向系统登记"proc"这么一种文件系统
write_unlock(&file_systems_lock);
return res;
}static struct file_system_type **find_filesystem(const char *name)
{
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)
if (strcmp((*p)->name,name) == 0)
break;
return p;
}struct vfsmount *kern_mount(struct file_system_type *type)
{
kdev_t dev = get_unnamed_dev();//获得一个设备号
struct super_block *sb;
struct vfsmount *mnt;
if (!dev)
return ERR_PTR(-EMFILE);
sb = read_super(dev, NULL, type, 0, NULL, 0);//先分配一个空白的super_block数据结构,然后通过由具体文件系统的file_system_type数据结构中的函数指针read_super调用具体的函数来读入超级块
if (!sb) {
put_unnamed_dev(dev);
return ERR_PTR(-EINVAL);
}
mnt = add_vfsmnt(NULL, sb->s_root, NULL);
if (!mnt) {
kill_super(sb, 0);
return ERR_PTR(-ENOMEM);
}
type->kern_mnt = mnt;//最后把根节点vfsmount赋值给type->kern_mnt
return mnt;
} read_super,先分配一个空白的super_block数据结构,然后通过由具体文件系统的file_system_type数据结构中的函数指针read_super调用具体的函数来读入超级块。static struct super_block * read_super(kdev_t dev, struct block_device *bdev,
struct file_system_type *type, int flags,
void *data, int silent)
{
struct super_block * s;
s = get_empty_super();
if (!s)
goto out;
s->s_dev = dev;
s->s_bdev = bdev;
s->s_flags = flags;
s->s_dirt = 0;
sema_init(&s->s_vfs_rename_sem,1);
sema_init(&s->s_nfsd_free_path_sem,1);
s->s_type = type;
sema_init(&s->s_dquot.dqio_sem, 1);
sema_init(&s->s_dquot.dqoff_sem, 1);
s->s_dquot.flags = 0;
lock_super(s);
if (!type->read_super(s, data, silent))
goto out_fail;
unlock_super(s);
/* tell bdcache that we are going to keep this one */
if (bdev)
atomic_inc(&bdev->bd_count);
out:
return s;
out_fail:
s->s_dev = 0;
s->s_bdev = 0;
s->s_type = NULL;
unlock_super(s);
return NULL;
} type->read_super对于proc文件系统来说,这个函数为proc_read_super()。代码如下:struct super_block *proc_read_super(struct super_block *s,void *data,
int silent)
{
struct inode * root_inode;
struct task_struct *p;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);//根据根目录项,得到根节点的inode结构
if (!root_inode)
goto out_no_root;
/*
* Fixup the root inode‘s nlink value
*/
read_lock(&tasklist_lock);
for_each_task(p) if (p->pid) root_inode->i_nlink++;
read_unlock(&tasklist_lock);
s->s_root = d_alloc_root(root_inode);//分配根节点的dentry结构,并把根节点的inode结构和dentry结构相连,并赋值给s->s_root
if (!s->s_root)
goto out_no_root;
parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
return s;
out_no_root:
printk("proc_read_super: get root inode failed\n");
iput(root_inode);
return NULL;
}
读入超级块,实际上是生成超级块,还有super_block结构中的super_operations指针s_op被设置成指向proc_sops,定义如下:static struct super_operations proc_sops = {
read_inode: proc_read_inode,
put_inode: force_delete,
delete_inode: proc_delete_inode,
statfs: proc_statfs,
};
不仅如此,proc文件系统中的目录项结构,即dentry结构,在设备上也没有对应物,而以内存中的proc_dir_entry数据结构来代替,定义如下:struct proc_dir_entry {
unsigned short low_ino;
unsigned short namelen;
const char *name;
mode_t mode;
nlink_t nlink;
uid_t uid;
gid_t gid;
unsigned long size;
struct inode_operations * proc_iops;
struct file_operations * proc_fops;
get_info_t *get_info;
struct module *owner;
struct proc_dir_entry *next, *parent, *subdir;
void *data;
read_proc_t *read_proc;
write_proc_t *write_proc;
atomic_t count; /* use count */
int deleted; /* delete flag */
kdev_t rdev;
} 最重要的就是/proc节点的proc_dir_entry结构(目录项)proc_root,定义如下:struct proc_dir_entry proc_root = {
low_ino: PROC_ROOT_INO,
namelen: 5,
name: "/proc",
mode: S_IFDIR | S_IRUGO | S_IXUGO,
nlink: 2,
proc_iops: &proc_root_inode_operations,
proc_fops: &proc_root_operations,
parent: &proc_root,
};
struct inode * proc_get_inode(struct super_block * sb, int ino,
struct proc_dir_entry * de)
{
struct inode * inode;
/*
* Increment the use count so the dir entry can‘t disappear.
*/
de_get(de);
#if 1
/* shouldn‘t ever happen */
if (de && de->deleted)
printk("proc_iget: using deleted entry %s, count=%d\n", de->name, atomic_read(&de->count));
#endif
inode = iget(sb, ino);
if (!inode)
goto out_fail;
inode->u.generic_ip = (void *) de;//根目录项结构放到了这里
if (de) {//根据根目录项结构,填充根节点的inode结构
if (de->mode) {
inode->i_mode = de->mode;
inode->i_uid = de->uid;
inode->i_gid = de->gid;
}
if (de->size)
inode->i_size = de->size;
if (de->nlink)
inode->i_nlink = de->nlink;
if (de->owner)
__MOD_INC_USE_COUNT(de->owner);
if (S_ISBLK(de->mode)||S_ISCHR(de->mode)||S_ISFIFO(de->mode))
init_special_inode(inode,de->mode,kdev_t_to_nr(de->rdev));
else {
if (de->proc_iops)
inode->i_op = de->proc_iops;//proc_root_inode_operations
if (de->proc_fops)
inode->i_fop = de->proc_fops;//proc_root_operations
}
}
out:
return inode;
out_fail:
de_put(de);
goto out;
} 返回到proc_read_super,开始执行d_alloc_root,分配根节点的dentry结构,并把根节点的inode结构和dentry结构相连。struct dentry * d_alloc_root(struct inode * root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
if (res) {
res->d_sb = root_inode->i_sb;
res->d_parent = res;//已经是根节点的dentry结构了,没有上一级了
d_instantiate(res, root_inode);//把根节点的inode结构和dentry结构相连
}
}
return res;
}static struct vfsmount *add_vfsmnt(struct nameidata *nd,
struct dentry *root,
const char *dev_name)
{
struct vfsmount *mnt;
struct super_block *sb = root->d_inode->i_sb;
char *name;
mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
if (!mnt)
goto out;
memset(mnt, 0, sizeof(struct vfsmount));
if (nd || dev_name)
mnt->mnt_flags = MNT_VISIBLE;
/* It may be NULL, but who cares? */
if (dev_name) {
name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
if (name) {
strcpy(name, dev_name);
mnt->mnt_devname = name;
}
}
mnt->mnt_owner = current->uid;
atomic_set(&mnt->mnt_count,1);
mnt->mnt_sb = sb;//重点
spin_lock(&dcache_lock);
if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
goto fail;
mnt->mnt_root = dget(root);//重点
mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);//本身就是挂载节点dentry结构
mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;//本身就是挂载节点vfsmount结构
if (nd) {
list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);
} else {
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_clash);
}
INIT_LIST_HEAD(&mnt->mnt_mounts);
list_add(&mnt->mnt_instances, &sb->s_mounts);
list_add(&mnt->mnt_list, vfsmntlist.prev);
spin_unlock(&dcache_lock);
out:
return mnt;
fail:
spin_unlock(&dcache_lock);
if (mnt->mnt_devname)
kfree(mnt->mnt_devname);
kfree(mnt);
return NULL;
}二、光是kern_mount()还不够,还得由系统的初始化进程从内核外部通过系统调用mount()再安装一次。通常,这个命令行为是:mount -nvt proc /dev/null proc
前面我们提到过,proc文件系统的file_system_type数据结构中的FS_SINGLE标志位为1,它起着重要的作用。为什么重要呢?因为它使sys_mount()的主体do_mount()通过get_sb_single(),而不是get_sb_bdev(),来取得所安装文件系统的super_block数据结构。相关代码如下:
if (fstype->fs_flags & FS_NOMOUNT) sb = ERR_PTR(-EINVAL); else if (fstype->fs_flags & FS_REQUIRES_DEV) sb = get_sb_bdev(fstype, dev_name, flags, data_page); else if (fstype->fs_flags & FS_SINGLE) sb = get_sb_single(fstype, flags, data_page); else sb = get_sb_nodev(fstype, flags, data_page);
static struct super_block *get_sb_single(struct file_system_type *fs_type,
int flags, void *data)
{
struct super_block * sb;
/*
* Get the superblock of kernel-wide instance, but
* keep the reference to fs_type.
*/
down(&mount_sem);
sb = fs_type->kern_mnt->mnt_sb;
if (!sb)
BUG();
get_filesystem(fs_type);
do_remount_sb(sb, flags, data);
return sb;
} 取得了proc文件系统的super_block结构以后,回到do_mount()代码中,以后的操作就与普通文件系统的安装无异了。这样就将proc文件系统安装到了节点/proc上。三、刚才我们看到了/proc节点的proc_dir_entry结构proc_root,现在我们创建/proc节点以下的子节点的proc_dir_entry结构,这是由内核在初始化时调用proc_root_init()完成的,代码如下:
void __init proc_root_init(void)
{
proc_misc_init();
proc_net = proc_mkdir("net", 0);
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", 0);
#endif
#ifdef CONFIG_SYSCTL
proc_sys_root = proc_mkdir("sys", 0);
#endif
proc_root_fs = proc_mkdir("fs", 0);
proc_root_driver = proc_mkdir("driver", 0);
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_mkdir("openprom", 0);
#endif
proc_tty_init();
#ifdef CONFIG_PROC_DEVICETREE
proc_device_tree_init();
#endif
proc_bus = proc_mkdir("bus", 0);
}
proc_misc_init,主要创建/proc节点以下的子节点的proc_dir_entry结构,而且子节点大多是文件,不是目录。
void __init proc_misc_init(void)
{
struct proc_dir_entry *entry;
static struct {
char *name;
int (*read_proc)(char*,char**,off_t,int,int*,void*);
} *p, simple_ones[] = {
{"loadavg", loadavg_read_proc},
{"uptime", uptime_read_proc},
{"meminfo", meminfo_read_proc},
{"version", version_read_proc},
{"cpuinfo", cpuinfo_read_proc},
#ifdef CONFIG_PROC_HARDWARE
{"hardware", hardware_read_proc},
#endif
#ifdef CONFIG_STRAM_PROC
{"stram", stram_read_proc},
#endif
#ifdef CONFIG_DEBUG_MALLOC
{"malloc", malloc_read_proc},
#endif
#ifdef CONFIG_MODULES
{"modules", modules_read_proc},
{"ksyms", ksyms_read_proc},
#endif
{"stat", kstat_read_proc},
{"devices", devices_read_proc},
{"partitions", partitions_read_proc},
#if !defined(CONFIG_ARCH_S390)
{"interrupts", interrupts_read_proc},
#endif
{"filesystems", filesystems_read_proc},
{"dma", dma_read_proc},
{"ioports", ioports_read_proc},
{"cmdline", cmdline_read_proc},
#ifdef CONFIG_SGI_DS1286
{"rtc", ds1286_read_proc},
#endif
{"locks", locks_read_proc},
{"mounts", mounts_read_proc},
{"swaps", swaps_read_proc},
{"iomem", memory_read_proc},
{"execdomains", execdomains_read_proc},
{NULL,}
};
for (p = simple_ones; p->name; p++)
create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL);
/* And now for trickier ones */
entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
if (entry)
entry->proc_fops = &proc_kmsg_operations;
proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
if (proc_root_kcore) {
proc_root_kcore->proc_fops = &proc_kcore_operations;
proc_root_kcore->size =
(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
}
if (prof_shift) {
entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
if (entry) {
entry->proc_fops = &proc_profile_operations;
entry->size = (1+prof_len) * sizeof(unsigned int);
}
}
#ifdef __powerpc__
{
extern struct file_operations ppc_htab_operations;
entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL);
if (entry)
entry->proc_fops = &ppc_htab_operations;
}
#endif
entry = create_proc_read_entry("slabinfo", S_IWUSR | S_IRUGO, NULL,
slabinfo_read_proc, NULL);
if (entry)
entry->write_proc = slabinfo_write_proc;
}create_proc_read_entry,主要创建/proc节点以下的子节点的proc_dir_entry结构,而且子节点大多是文件,不是目录。
extern inline struct proc_dir_entry *create_proc_read_entry(const char *name,//我们拿第一个举例,name为loadavg,mode为0,base为NULL,read_proc为loadavg_read_proc,data为NULL
mode_t mode, struct proc_dir_entry *base,
read_proc_t *read_proc, void * data)
{
struct proc_dir_entry *res=create_proc_entry(name,mode,base);
if (res) {
res->read_proc=read_proc;
res->data=data;
}
return res;
}struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
struct proc_dir_entry *parent)
{
struct proc_dir_entry *ent = NULL;
const char *fn = name;
int len;
if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
goto out;
len = strlen(fn);
ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);//创建proc_dir_entry结构
if (!ent)
goto out;
memset(ent, 0, sizeof(struct proc_dir_entry));
memcpy(((char *) ent) + sizeof(*ent), fn, len + 1);//前面是proc_dir_entry结构
ent->name = ((char *) ent) + sizeof(*ent);//后面是名字和长度
ent->namelen = len;
if (S_ISDIR(mode)) {
if ((mode & S_IALLUGO) == 0)
mode |= S_IRUGO | S_IXUGO;
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
ent->nlink = 2;
} else {
if ((mode & S_IFMT) == 0)
mode |= S_IFREG;
if ((mode & S_IALLUGO) == 0)
mode |= S_IRUGO;
ent->nlink = 1;
}
ent->mode = mode;
proc_register(parent, ent);//把loadavg节点的proc_dir_entry结构登记到根节点的proc_dir_entry结构
out:
return ent;
} xlate_proc_name,parent返回的是父节点的proc_dir_entry结构,fn返回当前的节点名,现在name为loadavg,返回的fn还是loadavg,parent是根节点的proc_dir_entry结构proc_root。static int xlate_proc_name(const char *name,
struct proc_dir_entry **ret, const char **residual)
{
const char *cp = name, *next;
struct proc_dir_entry *de;
int len;
de = &proc_root;
while (1) {
next = strchr(cp, ‘/‘);//此时next为空
if (!next)
break;
len = next - cp;
for (de = de->subdir; de ; de = de->next) {
if (proc_match(len, cp, de))
break;
}
if (!de)
return -ENOENT;
cp += len + 1;
}
*residual = cp;//指向loadavg
*ret = de;//指向根节点的proc_dir_entry结构
return 0;
} proc_register(parent, ent),把loadavg节点的proc_dir_entry结构登记到根节点的proc_dir_entry结构。static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
{
int i;
i = make_inode_number();
if (i < 0)
return -EAGAIN;
dp->low_ino = i;
dp->next = dir->subdir;
dp->parent = dir;//子节点的proc_dir_dentry通过subdir指向父节点的proc_dir_dentry
dir->subdir = dp;//父节点的proc_dir_dentry通过subdir指向子节点的proc_dir_dentry
if (S_ISDIR(dp->mode)) {
if (dp->proc_iops == NULL) {
dp->proc_fops = &proc_dir_operations;
dp->proc_iops = &proc_dir_inode_operations;
}
dir->nlink++;
} else if (S_ISLNK(dp->mode)) {
if (dp->proc_iops == NULL)
dp->proc_iops = &proc_link_inode_operations;
} else if (S_ISREG(dp->mode)) {//loadvag是普通文件
if (dp->proc_fops == NULL)
dp->proc_fops = &proc_file_operations;
}
return 0;
} proc_misc_init中的其他类似的代码就不解释了,例如:entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL);
entry = create_proc_read_entry("slabinfo", S_IWUSR | S_IRUGO, NULL,
slabinfo_read_proc, NULL);
struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent)
{
struct proc_dir_entry *ent = NULL;
const char *fn = name;
int len;
if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
goto out;
len = strlen(fn);
ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
if (!ent)
goto out;
memset(ent, 0, sizeof(struct proc_dir_entry));
memcpy(((char *) ent) + sizeof(*ent), fn, len + 1);
ent->name = ((char *) ent) + sizeof(*ent);
ent->namelen = len;
ent->proc_fops = &proc_dir_operations;//主要区别
ent->proc_iops = &proc_dir_inode_operations;
ent->nlink = 2;
ent->mode = S_IFDIR | S_IRUGO | S_IXUGO;
proc_register(parent, ent);
out:
return ent;
} 和上面的操作区别在于:ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations;proc_root_init还有其他类似的操作,就不解释了:
proc_mkdir("sysvipc", 0);
proc_sys_root = proc_mkdir("sys", 0);
proc_root_fs = proc_mkdir("fs", 0);
proc_root_driver = proc_mkdir("driver", 0)
proc_mkdir("openprom", 0);
proc_tty_init();
proc_bus = proc_mkdir("bus", 0);我们主要关心proc_tty_init,代码如下:
void __init proc_tty_init(void)
{
if (!proc_mkdir("tty", 0))
return;
proc_tty_ldisc = proc_mkdir("tty/ldisc", 0);
proc_tty_driver = proc_mkdir("tty/driver", 0);
create_proc_read_entry("tty/ldiscs", 0, 0, tty_ldiscs_read_proc,NULL);
create_proc_read_entry("tty/drivers", 0, 0, tty_drivers_read_proc,NULL);
}
proc_mkdir("tty", 0)和上面的步骤一样,proc_mkdir("tty/ldisc", 0)的执行,比较不同,如下:struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent)
{
struct proc_dir_entry *ent = NULL;
const char *fn = name;
int len;
if (!parent && xlate_proc_name(name, &parent, &fn) != 0)//name指向tty/ldisc,返回parent为tty节点的proc_dir_dentry结构,fn指向ldisc字符串
goto out;
len = strlen(fn);
ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
if (!ent)
goto out;
memset(ent, 0, sizeof(struct proc_dir_entry));
memcpy(((char *) ent) + sizeof(*ent), fn, len + 1);
ent->name = ((char *) ent) + sizeof(*ent);
ent->namelen = len;
ent->proc_fops = &proc_dir_operations;
ent->proc_iops = &proc_dir_inode_operations;
ent->nlink = 2;
ent->mode = S_IFDIR | S_IRUGO | S_IXUGO;
proc_register(parent, ent);//将ldisc这个节点的proc_dir_entry结构登记到tty这个节点的proc_dir_entry结构
out:
return ent;
}static int xlate_proc_name(const char *name,
struct proc_dir_entry **ret, const char **residual)//name指向tty/ldisc
{
const char *cp = name, *next;
struct proc_dir_entry *de;
int len;
de = &proc_root;
while (1) {
next = strchr(cp, ‘/‘);//next指向ldisc
if (!next)
break;
len = next - cp;//tty的长度,cp还指向tty
for (de = de->subdir; de ; de = de->next) {
if (proc_match(len, cp, de))//在根节点的proc_dir_entry结构的subdir寻找子节点的proc_dir_entry,直到匹配tty这个节点
break;//跳出for循环
}
if (!de)
return -ENOENT;
cp += len + 1;//cp指向了ldisc
}
*residual = cp;//指向了ldisc
*ret = de;//tty这个节点的proc_dir_entry结构
return 0;
}首先调用open("/proc/loadavg"),具体过程请参考Linux内核源代码情景分析-文件的打开,open_namei里面这部分会有些不同:
if (path_init(pathname, LOOKUP_PARENT, nd))
error = path_walk(pathname, nd);//找到父节点 找到"/proc/loadavg"的父节点,也就是/proc的节点,参考Linux内核源代码情景分析-文件系统安装后的访问,会调用是否是挂载点,while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)),这个函数找到proc节点的dentry结构。然后再调用dentry = lookup_hash(&nd->last, nd->dentry),nd->last就是下一个节点名"loadavg"。这个函数先通过cached_lookup()看看下一个节点的dentry结构是否已经建立在内存中,如果没有就要通过real_lookup()从设备上读入该节点的目录项(以及索引节点)并在内存中为之创建起它的dentry结构。
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
{
struct dentry * result;
struct inode *dir = parent->d_inode;
down(&dir->i_sem);
/*
* First re-do the cached lookup just in case it was created
* while we waited for the directory semaphore..
*
* FIXME! This could use version numbering or similar to
* avoid unnecessary cache lookups.
*/
result = d_lookup(parent, name);
if (!result) {
struct dentry * dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
lock_kernel();
result = dir->i_op->lookup(dir, dentry);
unlock_kernel();
if (result)
dput(dentry);
else
result = dentry;
}
up(&dir->i_sem);
return result;
}
/*
* Uhhuh! Nasty case: the cache was re-populated while
* we waited on the semaphore. Need to revalidate.
*/
up(&dir->i_sem);
if (result->d_op && result->d_op->d_revalidate) {
if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
dput(result);
result = ERR_PTR(-ENOENT);
}
}
return result;
} 对于/proc根节点的inode结构中的i_op指针指向proc_root_inode_operations,这是在proc_get_inode中设置的,如下:if (de->proc_iops) inode->i_op = de->proc_iops;//proc_root_inode_operations if (de->proc_fops) inode->i_fop = de->proc_fops;//proc_root_operations
static struct inode_operations proc_root_inode_operations = {
lookup: proc_root_lookup,
}; dir->i_op->lookup执行的代码是proc_root_lookup,代码如下:static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry)
{
if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */
int nlink = proc_root.nlink;
nlink += nr_threads;
dir->i_nlink = nlink;
}
if (!proc_lookup(dir, dentry))
return NULL;
return proc_pid_lookup(dir, dentry);
}struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry)
{
struct inode *inode;
struct proc_dir_entry * de;
int error;
error = -ENOENT;
inode = NULL;
de = (struct proc_dir_entry *) dir->u.generic_ip;
if (de) {
for (de = de->subdir; de ; de = de->next) {
if (!de || !de->low_ino)
continue;
if (de->namelen != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {//找到loadavg节点的proc_dir_entry结构
int ino = de->low_ino;
error = -EINVAL;
inode = proc_get_inode(dir->i_sb, ino, de);//根据loadavg节点的proc_dir_entry结构得到loadavg节点的inode结构
break;
}
}
}
if (inode) {
dentry->d_op = &proc_dentry_operations;
d_add(dentry, inode);
return NULL;
}
return ERR_PTR(error);
}struct inode * proc_get_inode(struct super_block * sb, int ino,
struct proc_dir_entry * de)
{
struct inode * inode;
/*
* Increment the use count so the dir entry can‘t disappear.
*/
de_get(de);
#if 1
/* shouldn‘t ever happen */
if (de && de->deleted)
printk("proc_iget: using deleted entry %s, count=%d\n", de->name, atomic_read(&de->count));
#endif
inode = iget(sb, ino);
if (!inode)
goto out_fail;
inode->u.generic_ip = (void *) de;
if (de) {
if (de->mode) {
inode->i_mode = de->mode;
inode->i_uid = de->uid;
inode->i_gid = de->gid;
}
if (de->size)
inode->i_size = de->size;
if (de->nlink)
inode->i_nlink = de->nlink;
if (de->owner)
__MOD_INC_USE_COUNT(de->owner);
if (S_ISBLK(de->mode)||S_ISCHR(de->mode)||S_ISFIFO(de->mode))
init_special_inode(inode,de->mode,kdev_t_to_nr(de->rdev));
else {
if (de->proc_iops)//loadavg节点proc_dir_entry结构这个指针为NULL
inode->i_op = de->proc_iops;
if (de->proc_fops)
inode->i_fop = de->proc_fops;//dp->proc_fops = &proc_file_operations,这是在create_proc_entry设置的
}
}
out:
return inode;
out_fail:
de_put(de);
goto out;
} open("/proc/loadavg"),执行完open_namei,继续执行dentry_open。struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
{
struct file * f;
struct inode *inode;
int error;
error = -ENFILE;
f = get_empty_filp();//分配一个空闲的file数据结构
if (!f)
goto cleanup_dentry;
f->f_flags = flags;
f->f_mode = (flags+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
f->f_dentry = dentry;//该节点的dentry结构
f->f_vfsmnt = mnt;//该节点的vfsmount结构
f->f_pos = 0;
f->f_reada = 0;
f->f_op = fops_get(inode->i_fop);//f->f_op被赋值为inode_i_fop,这里为proc_file_operations
if (inode->i_sb)
file_move(f, &inode->i_sb->s_files);//将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode,f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
return f;
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
f->f_dentry = NULL;
f->f_vfsmnt = NULL;
cleanup_file:
put_filp(f);
cleanup_dentry:
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
}asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count)
{
ssize_t ret;
struct file * file;
ret = -EBADF;
file = fget(fd);
if (file) {
if (file->f_mode & FMODE_READ) {
ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode,
file, file->f_pos, count);
if (!ret) {
ssize_t (*read)(struct file *, char *, size_t, loff_t *);
ret = -EINVAL;
if (file->f_op && (read = file->f_op->read) != NULL)
ret = read(file, buf, count, &file->f_pos);//proc_file_read
}
}
if (ret > 0)
inode_dir_notify(file->f_dentry->d_parent->d_inode,
DN_ACCESS);
fput(file);
}
return ret;
} 对于,proc文件系统来说,file->fop指向了proc_file_operations结构(见dentry_open里面的说明),代码如下:static struct file_operations proc_file_operations = {
llseek: proc_file_lseek,
read: proc_file_read,
write: proc_file_write,
};static ssize_t
proc_file_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos)
{
struct inode * inode = file->f_dentry->d_inode;
char *page;
ssize_t retval=0;
int eof=0;
ssize_t n, count;
char *start;
struct proc_dir_entry * dp;
dp = (struct proc_dir_entry *) inode->u.generic_ip;//取出loadavg节点的proc_dir_entry结构
if (!(page = (char*) __get_free_page(GFP_KERNEL)))
return -ENOMEM;
while ((nbytes > 0) && !eof)
{
count = MIN(PROC_BLOCK_SIZE, nbytes);
start = NULL;
if (dp->get_info) {
/*
* Handle backwards compatibility with the old net
* routines.
*/
n = dp->get_info(page, &start, *ppos, count);
if (n < count)
eof = 1;
} else if (dp->read_proc) {
n = dp->read_proc(page, &start, *ppos, //loadavg_read_proc
count, &eof, dp->data);//相关信息读到page上
} else
break;
if (!start) {
/*
* For proc files that are less than 4k
*/
start = page + *ppos;
n -= *ppos;
if (n <= 0)
break;
if (n > count)
n = count;
}
if (n == 0)
break; /* End of file */
if (n < 0) {
if (retval == 0)
retval = n;
break;
}
/* This is a hack to allow mangling of file pos independent
* of actual bytes read. Simply place the data at page,
* return the bytes, and set `start‘ to the desired offset
* as an unsigned int. - Paul.Russell@rustcorp.com.au
*/
n -= copy_to_user(buf, start < page ? page : start, n);//相关信息返回给用户
if (n == 0) {
if (retval == 0)
retval = -EFAULT;
break;
}
*ppos += start < page ? (long)start : n; /* Move down the file */
nbytes -= n;
buf += n;
retval += n;
}
free_page((unsigned long) page);
return retval;
}在前面代码中,设置了dp->read_proc,如下:
extern inline struct proc_dir_entry *create_proc_read_entry(const char *name,//我们拿第一个举例,name为loadavg,mode为0,base为NULL,read_proc为loadavg_read_proc,data为NULL
mode_t mode, struct proc_dir_entry *base,
read_proc_t *read_proc, void * data)
{
struct proc_dir_entry *res=create_proc_entry(name,mode,base);
if (res) {
res->read_proc=read_proc;
res->data=data;
}
return res;
} 所以dp->read_proc,执行代码如下:static int loadavg_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
int a, b, c;
int len;
a = avenrun[0] + (FIXED_1/200);
b = avenrun[1] + (FIXED_1/200);
c = avenrun[2] + (FIXED_1/200);
len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
LOAD_INT(a), LOAD_FRAC(a),
LOAD_INT(b), LOAD_FRAC(b),
LOAD_INT(c), LOAD_FRAC(c),
nr_running, nr_threads, last_pid);//过去1分钟,5分钟以及15分钟内的系统平均CPU负荷等统计信息sprintf()”打印“到缓冲区页面中,统计信息中还包括系统当前处于可运行状态的进程个数nr_running以及系统中进程的总数nr_threads,还有系统中已分配使用的最大进程号last_pid
return proc_calc_metrics(page, start, off, count, eof, len);
}static int proc_calc_metrics(char *page, char **start, off_t off,
int count, int *eof, int len)
{
if (len <= off+count) *eof = 1;
*start = page + off;
len -= off;
if (len>count) len = count;
if (len<0) len = 0;
return len;
} 它的作用就是将数组avenrun[]中积累的在过去1分钟,5分钟以及15分钟内的系统平均CPU负荷等统计信息sprintf()”打印“到缓冲区页面中。这些平均负荷的数值是每隔5秒钟在时钟中断服务程序中进行计算的,统计信息中还包括系统当前处于可运行状态的进程个数nr_running以及系统中进程的总数nr_threads,还有系统中已分配使用的最大进程号last_pid。标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/44858817