标签:
执行sudo mount -t ext2 /dev/sdb1 /mnt/sdb,将文件系统挂在到/mnt/sdb上。系统调用mount,映射到内核层执行的是sys_mount。假设/dev/sdb1和/mnt/sdb都位于ext2文件系统中。
asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type, unsigned long flags, void * data)//dev_name指向了"/dev/sdb1",dir_name指向了"/mnt/sdb",type是表示文件系统类型(即格式)的字符串,如"ext2"、"iso9660",flags为安装模式,见下面 { int retval; unsigned long data_page; unsigned long type_page; unsigned long dev_page; char *dir_page; retval = copy_mount_options (type, &type_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址 if (retval < 0) return retval; dir_page = getname(dir_name);//将参数从用户空间复制到系统空间,并返回指向该字符串的指针 retval = PTR_ERR(dir_page); if (IS_ERR(dir_page)) goto out1; retval = copy_mount_options (dev_name, &dev_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址 if (retval < 0) goto out2; retval = copy_mount_options (data, &data_page);//将参数从用户空间复制到系统空间。这些参数值的长度均以一个页面为限,并且返回页面的起始地址 if (retval < 0) goto out3; lock_kernel(); retval = do_mount((char*)dev_page, dir_page, (char*)type_page, flags, (void*)data_page); unlock_kernel(); free_page(data_page); out3: free_page(dev_page); out2: putname(dir_page); out1: free_page(type_page); return retval; }其中flags为安装模式:
#define MS_RDONLY 1 /* Mount read-only */ #define MS_NOSUID 2 /* Ignore suid and sgid bits */ #define MS_NODEV 4 /* Disallow access to device special files */ #define MS_NOEXEC 8 /* Disallow program execution */ #define MS_SYNCHRONOUS 16 /* Writes are synced at once */ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 /* * Flags that can be altered by MS_REMOUNT */ #define MS_RMT_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC| MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|MS_NODIRATIME) /* * Magic mount flag number. Has to be or-ed to the flag values. */ #define MS_MGC_VAL 0xC0ED0000 /* magic flag number to indicate "new" flags */ #define MS_MGC_MSK 0xffff0000 /* magic flag number mask */
所有的标志位都在低16位,而高16位则用作"magic_number"。
do_mount是整个过程的核心,代码如下:
long do_mount(char * dev_name, char * dir_name, char *type_page, //dev_name指向了"/dev/sdb1",dir_name指向了"/mnt/sdb"
unsigned long flags, void *data_page)
{
struct file_system_type * fstype;
struct nameidata nd;
struct vfsmount *mnt = NULL;
struct super_block *sb;
int retval = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))//dir_name指针不能为NULL,并且字符串的第一个字符不为NULL,即不是空字符串,并且字符串的长度不超过一个页面
return -EINVAL;
if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))//如果dev_name指针非空,那么要求字符串的长度不得大于一个页面
return -EINVAL;
/* OK, looks good, now let‘s see what do they want */
/* just change the flags? - capabilities are checked in do_remount() */
if (flags & MS_REMOUNT)//暂不关心
return do_remount(dir_name, flags & ~MS_REMOUNT,
(char *) data_page);
/* "mount --bind"? Equivalent to older "mount -t bind" */
/* No capabilities? What if users do thousands of these? */
if (flags & MS_BIND)//暂不关心
return do_loopback(dev_name, dir_name);
/* For the rest we need the type */
if (!type_page || !memchr(type_page, 0, PAGE_SIZE))//要求type_page指针不能为NULL,且字符串的长度不超过一个页面
return -EINVAL;
#if 0 /* Can be deleted again. Introduced in patch-2.3.99-pre6 */
/* loopback mount? This is special - requires fewer capabilities */
if (strcmp(type_page, "bind")==0)
return do_loopback(dev_name, dir_name);
#endif
/* for the rest we _really_ need capabilities... */
if (!capable(CAP_SYS_ADMIN))//检查权限
return -EPERM;
/* ... filesystem driver... */
fstype = get_fs_type(type_page);//根据文件系统类型名,找到对应的file_system_type结构
if (!fstype)
return -ENODEV;
/* ... and mountpoint. Do the lookup first to force automounting. */
if (path_init(dir_name,
LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
retval = path_walk(dir_name, &nd);//找到/mnt/sdb节点的mnt结构和dentry结构
if (retval)
goto fs_out;
/* get superblock, locks mount_sem on success */
if (fstype->fs_flags & FS_NOMOUNT)
sb = ERR_PTR(-EINVAL);
else if (fstype->fs_flags & FS_REQUIRES_DEV)//对于ext2文件系统来说,fs_flags为FS_REQUIRES_DEV
sb = get_sb_bdev(fstype, dev_name, flags, data_page);//要把待安装设备的"超级块"读进来并根据超级块中的信息在内存中建立起相应的super_block数据结构
else if (fstype->fs_flags & FS_SINGLE)
sb = get_sb_single(fstype, flags, data_page);
else
sb = get_sb_nodev(fstype, flags, data_page);
retval = PTR_ERR(sb);
if (IS_ERR(sb))
goto dput_out;
/* Something was mounted here while we slept */
while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))//是否是挂载点
;
/* Refuse the same filesystem on the same mount point */
retval = -EBUSY;
if (nd.mnt && nd.mnt->mnt_sb == sb
&& nd.mnt->mnt_root == nd.dentry)
goto fail;
retval = -ENOENT;
if (!nd.dentry->d_inode)
goto fail;
down(&nd.dentry->d_inode->i_zombie);
if (!IS_DEADDIR(nd.dentry->d_inode)) {
retval = -ENOMEM;
mnt = add_vfsmnt(&nd, sb->s_root, dev_name);//把待安装设备的super_block数据结构与安装点的dentry数据结构联系在一起
}
up(&nd.dentry->d_inode->i_zombie);
if (!mnt)
goto fail;
retval = 0;
unlock_out:
up(&mount_sem);
dput_out:
path_release(&nd);
fs_out:
put_filesystem(fstype);
return retval;
fail:
if (list_empty(&sb->s_mounts))
kill_super(sb, 0);
goto unlock_out;
}
系统支持的每一种文件系统都有一个file_system_type数据结构,他们都会链入file_systems结构。
struct file_system_type { const char *name; int fs_flags; struct super_block *(*read_super) (struct super_block *, void *, int); struct module *owner; struct vfsmount *kern_mnt; /* For kernel mount, if it‘s FS_SINGLE fs */ struct file_system_type * next; };对于ext2文件系统来说:
static DECLARE_FSTYPE_DEV(ext2_fs_type, "ext2", ext2_read_super);
#define DECLARE_FSTYPE_DEV(var,type,read) DECLARE_FSTYPE(var,type,read,FS_REQUIRES_DEV)
#define DECLARE_FSTYPE(var,type,read,flags) struct file_system_type var = { name: type, read_super: read, fs_flags: flags, owner: THIS_MODULE, }我们再看看其他文件系统:
static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT|FS_SINGLE);
static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, FS_LITTER);get_fs_type根据文件系统类型名,找到对应的file_system_type结构,代码如下:
struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; read_lock(&file_systems_lock); fs = *(find_filesystem(name));//根据name在file_systems中寻找file_system_type结构 if (fs && !try_inc_mod_count(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); if (!fs && (request_module(name) == 0)) { read_lock(&file_systems_lock); fs = *(find_filesystem(name)); if (fs && !try_inc_mod_count(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); } return fs; }
static struct file_system_type **find_filesystem(const char *name) { struct file_system_type **p; for (p=&file_systems; *p; p=&(*p)->next)//根据name在file_systems中寻找file_system_type结构 if (strcmp((*p)->name,name) == 0) break; return p; }
返回到do_mount,执行get_sb_bdev,要把待安装设备的"超级块"读进来并根据超级块中的信息在内存中建立起相应的super_block数据结构,代码如下:
static struct super_block *get_sb_bdev(struct file_system_type *fs_type, char *dev_name, int flags, void * data) { struct inode *inode; struct block_device *bdev; struct block_device_operations *bdops; struct super_block * sb; struct nameidata nd; kdev_t dev; int error = 0; /* What device it is? */ if (!dev_name || !*dev_name) return ERR_PTR(-EINVAL); if (path_init(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) error = path_walk(dev_name, &nd);//找到/mnt/sdb1节点的mnt结构和dentry结构 if (error) return ERR_PTR(error); inode = nd.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode))//一定要是设备节点 goto out; error = -EACCES; if (IS_NODEV(inode)) goto out; bdev = inode->i_bdev;//得到block_device结构 bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) ); if (bdops) bdev->bd_op = bdops; /* Done with lookups, semaphore down */ down(&mount_sem); dev = to_kdev_t(bdev->bd_dev);//设备号 sb = get_super(dev);//根据设备号在super_blocks中查找super_block结构 if (sb) {//如果有就返回 if (fs_type == sb->s_type && ((flags ^ sb->s_flags) & MS_RDONLY) == 0) { path_release(&nd); return sb; } } else {//如果没有 mode_t mode = FMODE_READ; /* we always need it ;-) */ if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; error = blkdev_get(bdev, mode, 0, BDEV_FS); if (error) goto out; check_disk_change(dev); error = -EACCES; if (!(flags & MS_RDONLY) && is_read_only(dev)) goto out1; error = -EINVAL; sb = read_super(dev, bdev, fs_type, flags, data, 0); if (sb) { get_filesystem(fs_type); path_release(&nd); return sb; } out1: blkdev_put(bdev, BDEV_FS); } out: path_release(&nd); up(&mount_sem); return ERR_PTR(error); }path_walk,和Linux内核源代码情景分析-从路径名到目标节点,一文中的区别在于:
for (block = 0; block < EXT2_N_BLOCKS; block++) inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; if (inode->i_ino == EXT2_ACL_IDX_INO || inode->i_ino == EXT2_ACL_DATA_INO) /* Nothing to do */ ; else if (S_ISREG(inode->i_mode)) { inode->i_op = &ext2_file_inode_operations; inode->i_fop = &ext2_file_operations; inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; } else if (S_ISLNK(inode->i_mode)) { if (!inode->i_blocks) inode->i_op = &ext2_fast_symlink_inode_operations; else { inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &ext2_aops; } } else //会执行这里 init_special_inode(inode, inode->i_mode, le32_to_cpu(raw_inode->i_block[0]));//i_block[0]是设备号还记得在Linux内核源代码情景分析-从路径名到目标节点,一文中:
bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de);//根据父节点的inode结构中inode->u.ext2_i.i_data找到对应的目录项在上面的片段中:
for (block = 0; block < EXT2_N_BLOCKS; block++) inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];所以总结raw_inode->i_block[block],如果得到是目录节点的inode,那么i_block[]存储着目录项的位置。如果是文件节点的inode,那么i_block[]存储着真正数据的位置,现在设备节点的inode存储着设备号(包含了主设备号和次设备号)。
void init_special_inode(struct inode *inode, umode_t mode, int rdev) { inode->i_mode = mode; if (S_ISCHR(mode)) { inode->i_fop = &def_chr_fops; inode->i_rdev = to_kdev_t(rdev); } else if (S_ISBLK(mode)) {//块设备 inode->i_fop = &def_blk_fops; inode->i_rdev = to_kdev_t(rdev); inode->i_bdev = bdget(rdev); } else if (S_ISFIFO(mode)) inode->i_fop = &def_fifo_fops; else if (S_ISSOCK(mode)) inode->i_fop = &bad_sock_fops; else printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode); }
struct block_device *bdget(dev_t dev) { struct list_head * head = bdev_hashtable + hash(dev); struct block_device *bdev, *new_bdev; spin_lock(&bdev_lock); bdev = bdfind(dev, head); spin_unlock(&bdev_lock); if (bdev) return bdev; new_bdev = alloc_bdev();//分配block_device结构 if (!new_bdev) return NULL; atomic_set(&new_bdev->bd_count,1); new_bdev->bd_dev = dev;//设备号 new_bdev->bd_op = NULL; spin_lock(&bdev_lock); bdev = bdfind(dev, head);//在hash表中寻找block_device结构 if (!bdev) { list_add(&new_bdev->bd_hash, head);//如果没有找到,就添加到hash表中,以便下次能够找到 spin_unlock(&bdev_lock); return new_bdev; } spin_unlock(&bdev_lock); destroy_bdev(new_bdev); return bdev; }
返回到get_sb_bdev,继续执行,如果get_super获取不到block_device结构,那么首先执行blkdev_get,设置bdev->bd_op,代码如下:
int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind) { int ret = -ENODEV; kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */ down(&bdev->bd_sem); if (!bdev->bd_op) bdev->bd_op = get_blkfops(MAJOR(rdev));//bdev->bd_op ...... return ret; }
const struct block_device_operations * get_blkfops(unsigned int major) { const struct block_device_operations *ret = NULL; /* major 0 is used for non-device mounts */ if (major && major < MAX_BLKDEV) { #ifdef CONFIG_KMOD if (!blkdevs[major].bdops) { char name[20]; sprintf(name, "block-major-%d", major); request_module(name); } #endif ret = blkdevs[major].bdops; } return ret; }
然后执行关键的read_super,根据设备号把设备的超级块从硬盘中读出来,代码如下:
static struct super_block * read_super(kdev_t dev, struct block_device *bdev, struct file_system_type *type, int flags, void *data, int silent) { struct super_block * s; s = get_empty_super(); if (!s) goto out; s->s_dev = dev;///设备号 s->s_bdev = bdev;//对应的block_device s->s_flags = flags; s->s_dirt = 0; sema_init(&s->s_vfs_rename_sem,1); sema_init(&s->s_nfsd_free_path_sem,1); s->s_type = type;//文件系统类型 sema_init(&s->s_dquot.dqio_sem, 1); sema_init(&s->s_dquot.dqoff_sem, 1); s->s_dquot.flags = 0; lock_super(s); if (!type->read_super(s, data, silent))//根据设备号,把文件系统的超级块从硬盘中读入 goto out_fail; unlock_super(s); /* tell bdcache that we are going to keep this one */ if (bdev) atomic_inc(&bdev->bd_count); out: return s; out_fail: s->s_dev = 0; s->s_bdev = 0; s->s_type = NULL; unlock_super(s); return NULL; }get_empty_super分配super_block结构,并链入super_blocks,以便下次能够查找到,代码如下:
struct super_block *get_empty_super(void) { struct super_block *s; for (s = sb_entry(super_blocks.next);//还是现在super_blocks里面查找 s != sb_entry(&super_blocks); s = sb_entry(s->s_list.next)) { if (s->s_dev) continue; if (!s->s_lock) return s; printk("VFS: empty superblock %p locked!\n", s); } /* Need a new one... */ if (nr_super_blocks >= max_super_blocks) return NULL; s = kmalloc(sizeof(struct super_block), GFP_USER);//分配super_block 结构 if (s) { nr_super_blocks++; memset(s, 0, sizeof(struct super_block)); INIT_LIST_HEAD(&s->s_dirty); list_add (&s->s_list, super_blocks.prev);//链入super_blocks init_waitqueue_head(&s->s_wait); INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_mounts); } return s; }
static struct vfsmount *add_vfsmnt(struct nameidata *nd, struct dentry *root, const char *dev_name) { struct vfsmount *mnt; struct super_block *sb = root->d_inode->i_sb; char *name; mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL); if (!mnt) goto out; memset(mnt, 0, sizeof(struct vfsmount)); if (nd || dev_name) mnt->mnt_flags = MNT_VISIBLE; /* It may be NULL, but who cares? */ if (dev_name) { name = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { strcpy(name, dev_name); mnt->mnt_devname = name; } } mnt->mnt_owner = current->uid; atomic_set(&mnt->mnt_count,1); mnt->mnt_sb = sb; spin_lock(&dcache_lock); if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) goto fail; mnt->mnt_root = dget(root); mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);//挂载节点的dentry结构 mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt; if (nd) { list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);//while(d_mountpoint(nd.dentry)会查找到 } else { INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_clash); } INIT_LIST_HEAD(&mnt->mnt_mounts); list_add(&mnt->mnt_instances, &sb->s_mounts); list_add(&mnt->mnt_list, vfsmntlist.prev); spin_unlock(&dcache_lock); out: return mnt; fail: spin_unlock(&dcache_lock); if (mnt->mnt_devname) kfree(mnt->mnt_devname); kfree(mnt); return NULL; }vfsmount结构如下:
struct vfsmount { struct dentry *mnt_mountpoint; /* dentry of mountpoint */ struct dentry *mnt_root; /* root of the mounted tree */ struct vfsmount *mnt_parent; /* fs we are mounted on */ struct list_head mnt_instances; /* other vfsmounts of the same fs */ struct list_head mnt_clash; /* those who are mounted on (other */ /* instances) of the same dentry */ struct super_block *mnt_sb; /* pointer to superblock */ struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ atomic_t mnt_count; int mnt_flags; char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; uid_t mnt_owner; };
至此,文件系统的安装就写完了。
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/44833227