码迷,mamicode.com
首页 > 系统相关 > 详细

Linux内核源代码情景分析-文件系统的安装

时间:2015-04-02 20:57:29      阅读:225      评论:0      收藏:0      [点我收藏+]

标签:

    执行sudo mount -t ext2 /dev/sdb1 /mnt/sdb,将文件系统挂在到/mnt/sdb上。系统调用mount,映射到内核层执行的是sys_mount。假设/dev/sdb1和/mnt/sdb都位于ext2文件系统中。

asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
			  unsigned long flags, void * data)//dev_name指向了"/dev/sdb1",dir_name指向了"/mnt/sdb",type是表示文件系统类型(即格式)的字符串,如"ext2"、"iso9660",flags为安装模式,见下面
{
	int retval;
	unsigned long data_page;
	unsigned long type_page;
	unsigned long dev_page;
	char *dir_page;

	retval = copy_mount_options (type, &type_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址
	if (retval < 0)
		return retval;

	dir_page = getname(dir_name);//将参数从用户空间复制到系统空间,并返回指向该字符串的指针
	retval = PTR_ERR(dir_page);
	if (IS_ERR(dir_page))
		goto out1;

	retval = copy_mount_options (dev_name, &dev_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址
	if (retval < 0)
		goto out2;

	retval = copy_mount_options (data, &data_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址
	if (retval < 0)
		goto out3;

	lock_kernel();
	retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
			  flags, (void*)data_page);
	unlock_kernel();
	free_page(data_page);

out3:
	free_page(dev_page);
out2:
	putname(dir_page);
out1:
	free_page(type_page);
	return retval;
}
    其中flags为安装模式:
#define MS_RDONLY	 1	/* Mount read-only */
#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
#define MS_NODEV	 4	/* Disallow access to device special files */
#define MS_NOEXEC	 8	/* Disallow program execution */
#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
#define MS_NOATIME	1024	/* Do not update access times. */
#define MS_NODIRATIME	2048	/* Do not update directory access times */
#define MS_BIND		4096

/*
 * Flags that can be altered by MS_REMOUNT
 */
#define MS_RMT_MASK	(MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|			MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|MS_NODIRATIME)

/*
 * Magic mount flag number. Has to be or-ed to the flag values.
 */
#define MS_MGC_VAL 0xC0ED0000	/* magic flag number to indicate "new" flags */
#define MS_MGC_MSK 0xffff0000	/* magic flag number mask */

    所有的标志位都在低16位,而高16位则用作"magic_number"。


    do_mount是整个过程的核心,代码如下:

long do_mount(char * dev_name, char * dir_name, char *type_page, //dev_name指向了"/dev/sdb1",dir_name指向了"/mnt/sdb"
		  unsigned long flags, void *data_page)
{
	struct file_system_type * fstype;
	struct nameidata nd;
	struct vfsmount *mnt = NULL;
	struct super_block *sb;
	int retval = 0;

	/* Discard magic */
	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
		flags &= ~MS_MGC_MSK;
 
	/* Basic sanity checks */

	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))//dir_name指针不能为NULL,并且字符串的第一个字符不为NULL,即不是空字符串,并且字符串的长度不超过一个页面
		return -EINVAL;
	if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))//如果dev_name指针非空,那么要求字符串的长度不得大于一个页面
		return -EINVAL;

	/* OK, looks good, now let‘s see what do they want */

	/* just change the flags? - capabilities are checked in do_remount() */
	if (flags & MS_REMOUNT)//暂不关心
		return do_remount(dir_name, flags & ~MS_REMOUNT,
				  (char *) data_page);

	/* "mount --bind"? Equivalent to older "mount -t bind" */
	/* No capabilities? What if users do thousands of these? */
	if (flags & MS_BIND)//暂不关心
		return do_loopback(dev_name, dir_name);

	/* For the rest we need the type */

	if (!type_page || !memchr(type_page, 0, PAGE_SIZE))//要求type_page指针不能为NULL,且字符串的长度不超过一个页面
		return -EINVAL;

#if 0	/* Can be deleted again. Introduced in patch-2.3.99-pre6 */
	/* loopback mount? This is special - requires fewer capabilities */
	if (strcmp(type_page, "bind")==0)
		return do_loopback(dev_name, dir_name);
#endif

	/* for the rest we _really_ need capabilities... */
	if (!capable(CAP_SYS_ADMIN))//检查权限
		return -EPERM;

	/* ... filesystem driver... */
	fstype = get_fs_type(type_page);//根据文件系统类型名,找到对应的file_system_type结构
	if (!fstype)		
		return -ENODEV;

	/* ... and mountpoint. Do the lookup first to force automounting. */
	if (path_init(dir_name,
		      LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
		retval = path_walk(dir_name, &nd);//找到/mnt/sdb节点的mnt结构和dentry结构
	if (retval)
		goto fs_out;

	/* get superblock, locks mount_sem on success */
	if (fstype->fs_flags & FS_NOMOUNT)
		sb = ERR_PTR(-EINVAL);
	else if (fstype->fs_flags & FS_REQUIRES_DEV)//对于ext2文件系统来说,fs_flags为FS_REQUIRES_DEV
		sb = get_sb_bdev(fstype, dev_name, flags, data_page);//要把待安装设备的"超级块"读进来并根据超级块中的信息在内存中建立起相应的super_block数据结构
	else if (fstype->fs_flags & FS_SINGLE)
		sb = get_sb_single(fstype, flags, data_page);
	else
		sb = get_sb_nodev(fstype, flags, data_page);

	retval = PTR_ERR(sb);
	if (IS_ERR(sb))
		goto dput_out;

	/* Something was mounted here while we slept */
	while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))//是否是挂载点
		;

	/* Refuse the same filesystem on the same mount point */
	retval = -EBUSY;
	if (nd.mnt && nd.mnt->mnt_sb == sb
	    	   && nd.mnt->mnt_root == nd.dentry)
		goto fail;

	retval = -ENOENT;
	if (!nd.dentry->d_inode)
		goto fail;
	down(&nd.dentry->d_inode->i_zombie);
	if (!IS_DEADDIR(nd.dentry->d_inode)) {
		retval = -ENOMEM;
		mnt = add_vfsmnt(&nd, sb->s_root, dev_name);//把待安装设备的super_block数据结构与安装点的dentry数据结构联系在一起	
	}
	up(&nd.dentry->d_inode->i_zombie);
	if (!mnt)
		goto fail;
	retval = 0;
unlock_out:
	up(&mount_sem);
dput_out:
	path_release(&nd);
fs_out:
	put_filesystem(fstype);
	return retval;

fail:
	if (list_empty(&sb->s_mounts))
		kill_super(sb, 0);
	goto unlock_out;
}
    

    系统支持的每一种文件系统都有一个file_system_type数据结构,他们都会链入file_systems结构。

struct file_system_type {
	const char *name;
	int fs_flags;
	struct super_block *(*read_super) (struct super_block *, void *, int);
	struct module *owner;
	struct vfsmount *kern_mnt; /* For kernel mount, if it‘s FS_SINGLE fs */
	struct file_system_type * next;
};
    对于ext2文件系统来说:
static DECLARE_FSTYPE_DEV(ext2_fs_type, "ext2", ext2_read_super);
#define DECLARE_FSTYPE_DEV(var,type,read) 	DECLARE_FSTYPE(var,type,read,FS_REQUIRES_DEV)
#define DECLARE_FSTYPE(var,type,read,flags) struct file_system_type var = { 	name:		type, 	read_super:	read, 	fs_flags:	flags, 	owner:		THIS_MODULE, }
    我们再看看其他文件系统:
static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super,
	FS_NOMOUNT|FS_SINGLE);
static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, FS_LITTER);
    get_fs_type根据文件系统类型名,找到对应的file_system_type结构,代码如下:
struct file_system_type *get_fs_type(const char *name)
{
	struct file_system_type *fs;
	
	read_lock(&file_systems_lock);
	fs = *(find_filesystem(name));//根据name在file_systems中寻找file_system_type结构
	if (fs && !try_inc_mod_count(fs->owner))
		fs = NULL;
	read_unlock(&file_systems_lock);
	if (!fs && (request_module(name) == 0)) {
		read_lock(&file_systems_lock);
		fs = *(find_filesystem(name));
		if (fs && !try_inc_mod_count(fs->owner))
			fs = NULL;
		read_unlock(&file_systems_lock);
	}
	return fs;
}
static struct file_system_type **find_filesystem(const char *name)
{
	struct file_system_type **p;
	for (p=&file_systems; *p; p=&(*p)->next)//根据name在file_systems中寻找file_system_type结构
		if (strcmp((*p)->name,name) == 0)
			break;
	return p;
}


    返回到do_mount,执行get_sb_bdev,要把待安装设备的"超级块"读进来并根据超级块中的信息在内存中建立起相应的super_block数据结构,代码如下:

static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
	char *dev_name, int flags, void * data)
{
	struct inode *inode;
	struct block_device *bdev;
	struct block_device_operations *bdops;
	struct super_block * sb;
	struct nameidata nd;
	kdev_t dev;
	int error = 0;
	/* What device it is? */
	if (!dev_name || !*dev_name)
		return ERR_PTR(-EINVAL);
	if (path_init(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
		error = path_walk(dev_name, &nd);//找到/mnt/sdb1节点的mnt结构和dentry结构
	if (error)
		return ERR_PTR(error);
	inode = nd.dentry->d_inode;
	error = -ENOTBLK;
	if (!S_ISBLK(inode->i_mode))//一定要是设备节点
		goto out;
	error = -EACCES;
	if (IS_NODEV(inode))
		goto out;
	bdev = inode->i_bdev;//得到block_device结构
	bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
	if (bdops) bdev->bd_op = bdops;
	/* Done with lookups, semaphore down */
	down(&mount_sem);
	dev = to_kdev_t(bdev->bd_dev);//设备号
	sb = get_super(dev);//根据设备号在super_blocks中查找super_block结构
	if (sb) {//如果有就返回
		if (fs_type == sb->s_type &&
		    ((flags ^ sb->s_flags) & MS_RDONLY) == 0) {
			path_release(&nd);
			return sb;
		}
	} else {//如果没有
		mode_t mode = FMODE_READ; /* we always need it ;-) */
		if (!(flags & MS_RDONLY))
			mode |= FMODE_WRITE;
		error = blkdev_get(bdev, mode, 0, BDEV_FS);
		if (error)
			goto out;
		check_disk_change(dev);
		error = -EACCES;
		if (!(flags & MS_RDONLY) && is_read_only(dev))
			goto out1;
		error = -EINVAL;
		sb = read_super(dev, bdev, fs_type, flags, data, 0);
		if (sb) {
			get_filesystem(fs_type);
			path_release(&nd);
			return sb;
		}
out1:
		blkdev_put(bdev, BDEV_FS);
	}
out:
	path_release(&nd);
	up(&mount_sem);
	return ERR_PTR(error);
}
    path_walk,和Linux内核源代码情景分析-从路径名到目标节点,一文中的区别在于:

for (block = 0; block < EXT2_N_BLOCKS; block++)
		inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];

	if (inode->i_ino == EXT2_ACL_IDX_INO ||
	    inode->i_ino == EXT2_ACL_DATA_INO)
		/* Nothing to do */ ;
	else if (S_ISREG(inode->i_mode)) {
		inode->i_op = &ext2_file_inode_operations;
		inode->i_fop = &ext2_file_operations;
		inode->i_mapping->a_ops = &ext2_aops;
	} else if (S_ISDIR(inode->i_mode)) {
		inode->i_op = &ext2_dir_inode_operations;
		inode->i_fop = &ext2_dir_operations;
	} else if (S_ISLNK(inode->i_mode)) {
		if (!inode->i_blocks)
			inode->i_op = &ext2_fast_symlink_inode_operations;
		else {
			inode->i_op = &page_symlink_inode_operations;
			inode->i_mapping->a_ops = &ext2_aops;
		}
	} else //会执行这里
		init_special_inode(inode, inode->i_mode,
				   le32_to_cpu(raw_inode->i_block[0]));//i_block[0]是设备号
    还记得在Linux内核源代码情景分析-从路径名到目标节点,一文中:
bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de);//根据父节点的inode结构中inode->u.ext2_i.i_data找到对应的目录项
    在上面的片段中:
for (block = 0; block < EXT2_N_BLOCKS; block++)
		inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];
    所以总结raw_inode->i_block[block],如果得到是目录节点的inode,那么i_block[]存储着目录项的位置。如果是文件节点的inode,那么i_block[]存储着真正数据的位置,现在设备节点的inode存储着设备号(包含了主设备号和次设备号)。
    接下里我们来看init_special_inode。
void init_special_inode(struct inode *inode, umode_t mode, int rdev)
{
	inode->i_mode = mode;
	if (S_ISCHR(mode)) {
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = to_kdev_t(rdev);
	} else if (S_ISBLK(mode)) {//块设备
		inode->i_fop = &def_blk_fops;
		inode->i_rdev = to_kdev_t(rdev);
		inode->i_bdev = bdget(rdev);
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode);
}
struct block_device *bdget(dev_t dev)
{
	struct list_head * head = bdev_hashtable + hash(dev);
	struct block_device *bdev, *new_bdev;
	spin_lock(&bdev_lock);
	bdev = bdfind(dev, head);
	spin_unlock(&bdev_lock);
	if (bdev)
		return bdev;
	new_bdev = alloc_bdev();//分配block_device结构
	if (!new_bdev)
		return NULL;
	atomic_set(&new_bdev->bd_count,1);
	new_bdev->bd_dev = dev;//设备号
	new_bdev->bd_op = NULL;
	spin_lock(&bdev_lock);
	bdev = bdfind(dev, head);//在hash表中寻找block_device结构
	if (!bdev) {
		list_add(&new_bdev->bd_hash, head);//如果没有找到,就添加到hash表中,以便下次能够找到
		spin_unlock(&bdev_lock);
		return new_bdev;
	}
	spin_unlock(&bdev_lock);
	destroy_bdev(new_bdev);
	return bdev;
}

    

    返回到get_sb_bdev,继续执行,如果get_super获取不到block_device结构,那么首先执行blkdev_get,设置bdev->bd_op,代码如下:

int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
{
	int ret = -ENODEV;
	kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
	down(&bdev->bd_sem);
	if (!bdev->bd_op)
		bdev->bd_op = get_blkfops(MAJOR(rdev));//bdev->bd_op
	......
	return ret;
}
const struct block_device_operations * get_blkfops(unsigned int major)
{
	const struct block_device_operations *ret = NULL;

	/* major 0 is used for non-device mounts */
	if (major && major < MAX_BLKDEV) {
#ifdef CONFIG_KMOD
		if (!blkdevs[major].bdops) {
			char name[20];
			sprintf(name, "block-major-%d", major);
			request_module(name);
		}
#endif
		ret = blkdevs[major].bdops;
	}
	return ret;
}

    然后执行关键的read_super,根据设备号把设备的超级块从硬盘中读出来,代码如下:

static struct super_block * read_super(kdev_t dev, struct block_device *bdev,
				       struct file_system_type *type, int flags,
				       void *data, int silent)
{
	struct super_block * s;
	s = get_empty_super();
	if (!s)
		goto out;
	s->s_dev = dev;///设备号
	s->s_bdev = bdev;//对应的block_device
	s->s_flags = flags;
	s->s_dirt = 0;
	sema_init(&s->s_vfs_rename_sem,1);
	sema_init(&s->s_nfsd_free_path_sem,1);
	s->s_type = type;//文件系统类型
	sema_init(&s->s_dquot.dqio_sem, 1);
	sema_init(&s->s_dquot.dqoff_sem, 1);
	s->s_dquot.flags = 0;
	lock_super(s);
	if (!type->read_super(s, data, silent))//根据设备号,把文件系统的超级块从硬盘中读入
		goto out_fail;
	unlock_super(s);
	/* tell bdcache that we are going to keep this one */
	if (bdev)
		atomic_inc(&bdev->bd_count);
out:
	return s;

out_fail:
	s->s_dev = 0;
	s->s_bdev = 0;
	s->s_type = NULL;
	unlock_super(s);
	return NULL;
}
     get_empty_super分配super_block结构,并链入super_blocks,以便下次能够查找到,代码如下:

struct super_block *get_empty_super(void)
{
	struct super_block *s;

	for (s  = sb_entry(super_blocks.next);//还是现在super_blocks里面查找
	     s != sb_entry(&super_blocks); 
	     s  = sb_entry(s->s_list.next)) {
		if (s->s_dev)
			continue;
		if (!s->s_lock)
			return s;
		printk("VFS: empty superblock %p locked!\n", s);
	}
	/* Need a new one... */
	if (nr_super_blocks >= max_super_blocks)
		return NULL;
	s = kmalloc(sizeof(struct super_block),  GFP_USER);//分配super_block 结构
	if (s) {
		nr_super_blocks++;
		memset(s, 0, sizeof(struct super_block));
		INIT_LIST_HEAD(&s->s_dirty);
		list_add (&s->s_list, super_blocks.prev);//链入super_blocks
		init_waitqueue_head(&s->s_wait);
		INIT_LIST_HEAD(&s->s_files);
		INIT_LIST_HEAD(&s->s_mounts);
	}
	return s;
}

    执行完get_sb_bdev,获得了设备的超级块结构,返回到do_mount继续执行add_vfsmnt,代码如下:

static struct vfsmount *add_vfsmnt(struct nameidata *nd,
				struct dentry *root,
				const char *dev_name)
{
	struct vfsmount *mnt;
	struct super_block *sb = root->d_inode->i_sb;
	char *name;

	mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
	if (!mnt)
		goto out;
	memset(mnt, 0, sizeof(struct vfsmount));

	if (nd || dev_name)
		mnt->mnt_flags = MNT_VISIBLE;

	/* It may be NULL, but who cares? */
	if (dev_name) {
		name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
		if (name) {
			strcpy(name, dev_name);
			mnt->mnt_devname = name;
		}
	}
	mnt->mnt_owner = current->uid;
	atomic_set(&mnt->mnt_count,1);
	mnt->mnt_sb = sb;

	spin_lock(&dcache_lock);
	if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
		goto fail;
	mnt->mnt_root = dget(root);
	mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);//挂载节点的dentry结构
	mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;

	if (nd) {
		list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
		list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);//while(d_mountpoint(nd.dentry)会查找到
	} else {
		INIT_LIST_HEAD(&mnt->mnt_child);
		INIT_LIST_HEAD(&mnt->mnt_clash);
	}
	INIT_LIST_HEAD(&mnt->mnt_mounts);
	list_add(&mnt->mnt_instances, &sb->s_mounts);
	list_add(&mnt->mnt_list, vfsmntlist.prev);
	spin_unlock(&dcache_lock);
out:
	return mnt;
fail:
	spin_unlock(&dcache_lock);
	if (mnt->mnt_devname)
		kfree(mnt->mnt_devname);
	kfree(mnt);
	return NULL;
}
    vfsmount结构如下:

struct vfsmount
{
	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
	struct dentry *mnt_root;	/* root of the mounted tree */
	struct vfsmount *mnt_parent;	/* fs we are mounted on */
	struct list_head mnt_instances;	/* other vfsmounts of the same fs */
	struct list_head mnt_clash;	/* those who are mounted on (other */
					/* instances) of the same dentry */
	struct super_block *mnt_sb;	/* pointer to superblock */
	struct list_head mnt_mounts;	/* list of children, anchored here */
	struct list_head mnt_child;	/* and going through their mnt_child */
	atomic_t mnt_count;
	int mnt_flags;
	char *mnt_devname;		/* Name of device e.g. /dev/dsk/hda1 */
	struct list_head mnt_list;
	uid_t mnt_owner;
};
    技术分享



技术分享  

    至此,文件系统的安装就写完了。

Linux内核源代码情景分析-文件系统的安装

标签:

原文地址:http://blog.csdn.net/jltxgcy/article/details/44833227

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!