标签:
执行sudo mount -t ext2 /dev/sdb1 /mnt/sdb,将文件系统挂在到/mnt/sdb上。系统调用mount,映射到内核层执行的是sys_mount。假设/dev/sdb1和/mnt/sdb都位于ext2文件系统中。
asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
unsigned long flags, void * data)//dev_name指向了"/dev/sdb1",dir_name指向了"/mnt/sdb",type是表示文件系统类型(即格式)的字符串,如"ext2"、"iso9660",flags为安装模式,见下面
{
int retval;
unsigned long data_page;
unsigned long type_page;
unsigned long dev_page;
char *dir_page;
retval = copy_mount_options (type, &type_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址
if (retval < 0)
return retval;
dir_page = getname(dir_name);//将参数从用户空间复制到系统空间,并返回指向该字符串的指针
retval = PTR_ERR(dir_page);
if (IS_ERR(dir_page))
goto out1;
retval = copy_mount_options (dev_name, &dev_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址
if (retval < 0)
goto out2;
retval = copy_mount_options (data, &data_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址
if (retval < 0)
goto out3;
lock_kernel();
retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
flags, (void*)data_page);
unlock_kernel();
free_page(data_page);
out3:
free_page(dev_page);
out2:
putname(dir_page);
out1:
free_page(type_page);
return retval;
} 其中flags为安装模式:#define MS_RDONLY 1 /* Mount read-only */ #define MS_NOSUID 2 /* Ignore suid and sgid bits */ #define MS_NODEV 4 /* Disallow access to device special files */ #define MS_NOEXEC 8 /* Disallow program execution */ #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 /* * Flags that can be altered by MS_REMOUNT */ #define MS_RMT_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC| MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|MS_NODIRATIME) /* * Magic mount flag number. Has to be or-ed to the flag values. */ #define MS_MGC_VAL 0xC0ED0000 /* magic flag number to indicate "new" flags */ #define MS_MGC_MSK 0xffff0000 /* magic flag number mask */
所有的标志位都在低16位,而高16位则用作"magic_number"。
do_mount是整个过程的核心,代码如下:
long do_mount(char * dev_name, char * dir_name, char *type_page, //dev_name指向了"/dev/sdb1",dir_name指向了"/mnt/sdb"
unsigned long flags, void *data_page)
{
struct file_system_type * fstype;
struct nameidata nd;
struct vfsmount *mnt = NULL;
struct super_block *sb;
int retval = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))//dir_name指针不能为NULL,并且字符串的第一个字符不为NULL,即不是空字符串,并且字符串的长度不超过一个页面
return -EINVAL;
if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))//如果dev_name指针非空,那么要求字符串的长度不得大于一个页面
return -EINVAL;
/* OK, looks good, now let‘s see what do they want */
/* just change the flags? - capabilities are checked in do_remount() */
if (flags & MS_REMOUNT)//暂不关心
return do_remount(dir_name, flags & ~MS_REMOUNT,
(char *) data_page);
/* "mount --bind"? Equivalent to older "mount -t bind" */
/* No capabilities? What if users do thousands of these? */
if (flags & MS_BIND)//暂不关心
return do_loopback(dev_name, dir_name);
/* For the rest we need the type */
if (!type_page || !memchr(type_page, 0, PAGE_SIZE))//要求type_page指针不能为NULL,且字符串的长度不超过一个页面
return -EINVAL;
#if 0 /* Can be deleted again. Introduced in patch-2.3.99-pre6 */
/* loopback mount? This is special - requires fewer capabilities */
if (strcmp(type_page, "bind")==0)
return do_loopback(dev_name, dir_name);
#endif
/* for the rest we _really_ need capabilities... */
if (!capable(CAP_SYS_ADMIN))//检查权限
return -EPERM;
/* ... filesystem driver... */
fstype = get_fs_type(type_page);//根据文件系统类型名,找到对应的file_system_type结构
if (!fstype)
return -ENODEV;
/* ... and mountpoint. Do the lookup first to force automounting. */
if (path_init(dir_name,
LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
retval = path_walk(dir_name, &nd);//找到/mnt/sdb节点的mnt结构和dentry结构
if (retval)
goto fs_out;
/* get superblock, locks mount_sem on success */
if (fstype->fs_flags & FS_NOMOUNT)
sb = ERR_PTR(-EINVAL);
else if (fstype->fs_flags & FS_REQUIRES_DEV)//对于ext2文件系统来说,fs_flags为FS_REQUIRES_DEV
sb = get_sb_bdev(fstype, dev_name, flags, data_page);//要把待安装设备的"超级块"读进来并根据超级块中的信息在内存中建立起相应的super_block数据结构
else if (fstype->fs_flags & FS_SINGLE)
sb = get_sb_single(fstype, flags, data_page);
else
sb = get_sb_nodev(fstype, flags, data_page);
retval = PTR_ERR(sb);
if (IS_ERR(sb))
goto dput_out;
/* Something was mounted here while we slept */
while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))//是否是挂载点
;
/* Refuse the same filesystem on the same mount point */
retval = -EBUSY;
if (nd.mnt && nd.mnt->mnt_sb == sb
&& nd.mnt->mnt_root == nd.dentry)
goto fail;
retval = -ENOENT;
if (!nd.dentry->d_inode)
goto fail;
down(&nd.dentry->d_inode->i_zombie);
if (!IS_DEADDIR(nd.dentry->d_inode)) {
retval = -ENOMEM;
mnt = add_vfsmnt(&nd, sb->s_root, dev_name);//把待安装设备的super_block数据结构与安装点的dentry数据结构联系在一起
}
up(&nd.dentry->d_inode->i_zombie);
if (!mnt)
goto fail;
retval = 0;
unlock_out:
up(&mount_sem);
dput_out:
path_release(&nd);
fs_out:
put_filesystem(fstype);
return retval;
fail:
if (list_empty(&sb->s_mounts))
kill_super(sb, 0);
goto unlock_out;
} 系统支持的每一种文件系统都有一个file_system_type数据结构,他们都会链入file_systems结构。
struct file_system_type {
const char *name;
int fs_flags;
struct super_block *(*read_super) (struct super_block *, void *, int);
struct module *owner;
struct vfsmount *kern_mnt; /* For kernel mount, if it‘s FS_SINGLE fs */
struct file_system_type * next;
}; 对于ext2文件系统来说:static DECLARE_FSTYPE_DEV(ext2_fs_type, "ext2", ext2_read_super);
#define DECLARE_FSTYPE_DEV(var,type,read) DECLARE_FSTYPE(var,type,read,FS_REQUIRES_DEV)
#define DECLARE_FSTYPE(var,type,read,flags) struct file_system_type var = { name: type, read_super: read, fs_flags: flags, owner: THIS_MODULE, } 我们再看看其他文件系统:static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT|FS_SINGLE);
static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, FS_LITTER);get_fs_type根据文件系统类型名,找到对应的file_system_type结构,代码如下:
struct file_system_type *get_fs_type(const char *name)
{
struct file_system_type *fs;
read_lock(&file_systems_lock);
fs = *(find_filesystem(name));//根据name在file_systems中寻找file_system_type结构
if (fs && !try_inc_mod_count(fs->owner))
fs = NULL;
read_unlock(&file_systems_lock);
if (!fs && (request_module(name) == 0)) {
read_lock(&file_systems_lock);
fs = *(find_filesystem(name));
if (fs && !try_inc_mod_count(fs->owner))
fs = NULL;
read_unlock(&file_systems_lock);
}
return fs;
}static struct file_system_type **find_filesystem(const char *name)
{
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)//根据name在file_systems中寻找file_system_type结构
if (strcmp((*p)->name,name) == 0)
break;
return p;
}返回到do_mount,执行get_sb_bdev,要把待安装设备的"超级块"读进来并根据超级块中的信息在内存中建立起相应的super_block数据结构,代码如下:
static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
char *dev_name, int flags, void * data)
{
struct inode *inode;
struct block_device *bdev;
struct block_device_operations *bdops;
struct super_block * sb;
struct nameidata nd;
kdev_t dev;
int error = 0;
/* What device it is? */
if (!dev_name || !*dev_name)
return ERR_PTR(-EINVAL);
if (path_init(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
error = path_walk(dev_name, &nd);//找到/mnt/sdb1节点的mnt结构和dentry结构
if (error)
return ERR_PTR(error);
inode = nd.dentry->d_inode;
error = -ENOTBLK;
if (!S_ISBLK(inode->i_mode))//一定要是设备节点
goto out;
error = -EACCES;
if (IS_NODEV(inode))
goto out;
bdev = inode->i_bdev;//得到block_device结构
bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
if (bdops) bdev->bd_op = bdops;
/* Done with lookups, semaphore down */
down(&mount_sem);
dev = to_kdev_t(bdev->bd_dev);//设备号
sb = get_super(dev);//根据设备号在super_blocks中查找super_block结构
if (sb) {//如果有就返回
if (fs_type == sb->s_type &&
((flags ^ sb->s_flags) & MS_RDONLY) == 0) {
path_release(&nd);
return sb;
}
} else {//如果没有
mode_t mode = FMODE_READ; /* we always need it ;-) */
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
error = blkdev_get(bdev, mode, 0, BDEV_FS);
if (error)
goto out;
check_disk_change(dev);
error = -EACCES;
if (!(flags & MS_RDONLY) && is_read_only(dev))
goto out1;
error = -EINVAL;
sb = read_super(dev, bdev, fs_type, flags, data, 0);
if (sb) {
get_filesystem(fs_type);
path_release(&nd);
return sb;
}
out1:
blkdev_put(bdev, BDEV_FS);
}
out:
path_release(&nd);
up(&mount_sem);
return ERR_PTR(error);
} path_walk,和Linux内核源代码情景分析-从路径名到目标节点,一文中的区别在于:for (block = 0; block < EXT2_N_BLOCKS; block++)
inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];
if (inode->i_ino == EXT2_ACL_IDX_INO ||
inode->i_ino == EXT2_ACL_DATA_INO)
/* Nothing to do */ ;
else if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext2_file_inode_operations;
inode->i_fop = &ext2_file_operations;
inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (!inode->i_blocks)
inode->i_op = &ext2_fast_symlink_inode_operations;
else {
inode->i_op = &page_symlink_inode_operations;
inode->i_mapping->a_ops = &ext2_aops;
}
} else //会执行这里
init_special_inode(inode, inode->i_mode,
le32_to_cpu(raw_inode->i_block[0]));//i_block[0]是设备号 还记得在Linux内核源代码情景分析-从路径名到目标节点,一文中:bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de);//根据父节点的inode结构中inode->u.ext2_i.i_data找到对应的目录项在上面的片段中:
for (block = 0; block < EXT2_N_BLOCKS; block++) inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];所以总结raw_inode->i_block[block],如果得到是目录节点的inode,那么i_block[]存储着目录项的位置。如果是文件节点的inode,那么i_block[]存储着真正数据的位置,现在设备节点的inode存储着设备号(包含了主设备号和次设备号)。
void init_special_inode(struct inode *inode, umode_t mode, int rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = to_kdev_t(rdev);
} else if (S_ISBLK(mode)) {//块设备
inode->i_fop = &def_blk_fops;
inode->i_rdev = to_kdev_t(rdev);
inode->i_bdev = bdget(rdev);
} else if (S_ISFIFO(mode))
inode->i_fop = &def_fifo_fops;
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode);
}
struct block_device *bdget(dev_t dev)
{
struct list_head * head = bdev_hashtable + hash(dev);
struct block_device *bdev, *new_bdev;
spin_lock(&bdev_lock);
bdev = bdfind(dev, head);
spin_unlock(&bdev_lock);
if (bdev)
return bdev;
new_bdev = alloc_bdev();//分配block_device结构
if (!new_bdev)
return NULL;
atomic_set(&new_bdev->bd_count,1);
new_bdev->bd_dev = dev;//设备号
new_bdev->bd_op = NULL;
spin_lock(&bdev_lock);
bdev = bdfind(dev, head);//在hash表中寻找block_device结构
if (!bdev) {
list_add(&new_bdev->bd_hash, head);//如果没有找到,就添加到hash表中,以便下次能够找到
spin_unlock(&bdev_lock);
return new_bdev;
}
spin_unlock(&bdev_lock);
destroy_bdev(new_bdev);
return bdev;
}
返回到get_sb_bdev,继续执行,如果get_super获取不到block_device结构,那么首先执行blkdev_get,设置bdev->bd_op,代码如下:
int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
{
int ret = -ENODEV;
kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
down(&bdev->bd_sem);
if (!bdev->bd_op)
bdev->bd_op = get_blkfops(MAJOR(rdev));//bdev->bd_op
......
return ret;
}const struct block_device_operations * get_blkfops(unsigned int major)
{
const struct block_device_operations *ret = NULL;
/* major 0 is used for non-device mounts */
if (major && major < MAX_BLKDEV) {
#ifdef CONFIG_KMOD
if (!blkdevs[major].bdops) {
char name[20];
sprintf(name, "block-major-%d", major);
request_module(name);
}
#endif
ret = blkdevs[major].bdops;
}
return ret;
}
然后执行关键的read_super,根据设备号把设备的超级块从硬盘中读出来,代码如下:
static struct super_block * read_super(kdev_t dev, struct block_device *bdev,
struct file_system_type *type, int flags,
void *data, int silent)
{
struct super_block * s;
s = get_empty_super();
if (!s)
goto out;
s->s_dev = dev;///设备号
s->s_bdev = bdev;//对应的block_device
s->s_flags = flags;
s->s_dirt = 0;
sema_init(&s->s_vfs_rename_sem,1);
sema_init(&s->s_nfsd_free_path_sem,1);
s->s_type = type;//文件系统类型
sema_init(&s->s_dquot.dqio_sem, 1);
sema_init(&s->s_dquot.dqoff_sem, 1);
s->s_dquot.flags = 0;
lock_super(s);
if (!type->read_super(s, data, silent))//根据设备号,把文件系统的超级块从硬盘中读入
goto out_fail;
unlock_super(s);
/* tell bdcache that we are going to keep this one */
if (bdev)
atomic_inc(&bdev->bd_count);
out:
return s;
out_fail:
s->s_dev = 0;
s->s_bdev = 0;
s->s_type = NULL;
unlock_super(s);
return NULL;
} get_empty_super分配super_block结构,并链入super_blocks,以便下次能够查找到,代码如下:struct super_block *get_empty_super(void)
{
struct super_block *s;
for (s = sb_entry(super_blocks.next);//还是现在super_blocks里面查找
s != sb_entry(&super_blocks);
s = sb_entry(s->s_list.next)) {
if (s->s_dev)
continue;
if (!s->s_lock)
return s;
printk("VFS: empty superblock %p locked!\n", s);
}
/* Need a new one... */
if (nr_super_blocks >= max_super_blocks)
return NULL;
s = kmalloc(sizeof(struct super_block), GFP_USER);//分配super_block 结构
if (s) {
nr_super_blocks++;
memset(s, 0, sizeof(struct super_block));
INIT_LIST_HEAD(&s->s_dirty);
list_add (&s->s_list, super_blocks.prev);//链入super_blocks
init_waitqueue_head(&s->s_wait);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_mounts);
}
return s;
}static struct vfsmount *add_vfsmnt(struct nameidata *nd,
struct dentry *root,
const char *dev_name)
{
struct vfsmount *mnt;
struct super_block *sb = root->d_inode->i_sb;
char *name;
mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
if (!mnt)
goto out;
memset(mnt, 0, sizeof(struct vfsmount));
if (nd || dev_name)
mnt->mnt_flags = MNT_VISIBLE;
/* It may be NULL, but who cares? */
if (dev_name) {
name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
if (name) {
strcpy(name, dev_name);
mnt->mnt_devname = name;
}
}
mnt->mnt_owner = current->uid;
atomic_set(&mnt->mnt_count,1);
mnt->mnt_sb = sb;
spin_lock(&dcache_lock);
if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
goto fail;
mnt->mnt_root = dget(root);
mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);//挂载节点的dentry结构
mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;
if (nd) {
list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);//while(d_mountpoint(nd.dentry)会查找到
} else {
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_clash);
}
INIT_LIST_HEAD(&mnt->mnt_mounts);
list_add(&mnt->mnt_instances, &sb->s_mounts);
list_add(&mnt->mnt_list, vfsmntlist.prev);
spin_unlock(&dcache_lock);
out:
return mnt;
fail:
spin_unlock(&dcache_lock);
if (mnt->mnt_devname)
kfree(mnt->mnt_devname);
kfree(mnt);
return NULL;
} vfsmount结构如下:struct vfsmount
{
struct dentry *mnt_mountpoint; /* dentry of mountpoint */
struct dentry *mnt_root; /* root of the mounted tree */
struct vfsmount *mnt_parent; /* fs we are mounted on */
struct list_head mnt_instances; /* other vfsmounts of the same fs */
struct list_head mnt_clash; /* those who are mounted on (other */
/* instances) of the same dentry */
struct super_block *mnt_sb; /* pointer to superblock */
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
atomic_t mnt_count;
int mnt_flags;
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
uid_t mnt_owner;
};
至此,文件系统的安装就写完了。
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/44833227