标签:
在下面几种情况下会发生,页面出错异常(也叫缺页中断):
1、相应的页面目录项或者页面表项为空,也就是该线性地址与物理地址的映射关系尚未建立,或者已经撤销。
2、相应的物理页面不在内存中。 本文讨论的就是这种情况。
3、指令中规定的访问方式与页面的权限不符,例如企图写一个“只读”的页面。
假设已经建立好了映射,但是页表项最后一位P为0,表示页面不在内存中;整个页表项如下图,offset表示页面在一个磁盘设备的位置,也就是磁盘设备的逻辑页面号;而type则是指该页面在哪一个磁盘设备中。
图 1 页面交换项结构
这里假定CPU的运行已经到达了页面异常服务程序的主体do_page_fault()的入口处。
代码如下: arch/i386/mm/fault.c
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long address;
unsigned long page;
unsigned long fixup;
int write;
siginfo_t info;
/* get the address */
__asm__("movl %%cr2,%0":"=r" (address));//把映射的失败的地址保存在address中
tsk = current;//task_struct
/*
* We fault-in kernel-space virtual memory on-demand. The
* ‘reference‘ page table is init_mm.pgd.
*
* NOTE! We MUST NOT take any locks for this case. We may
* be in an interrupt or a critical region, and should
* only copy the information from the master page table,
* nothing more.
*/
if (address >= TASK_SIZE)
goto vmalloc_fault;
mm = tsk->mm;//mm_struct
info.si_code = SEGV_MAPERR;
/*
* If we‘re in an interrupt or have no user
* context, we must not take the fault..
*/
if (in_interrupt() || !mm)
goto no_context;
down(&mm->mmap_sem);
vma = find_vma(mm, address);//找出结束地址大于给定地址的第一个区间。
if (!vma)//没有找到,说明没有一个区间的结束地址高于给定的地址,参考上图,说明这个地址是在堆栈之下,也就是3G字节以上了。
goto bad_area;
if (vma->vm_start <= address)//起始地址不高于address,说明映射已经建立,转到good_area去进一步检查失败原因。
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
....
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
good_area:
info.si_code = SEGV_ACCERR;
write = 0;
switch (error_code & 3) {// 110 & 011 = 2
default: /* 3: write, present */
#ifdef TEST_VERIFY_AREA
if (regs->cs == KERNEL_CS)
printk("WP fault at %08lx\n", regs->eip);
#endif
/* fall through */
case 2: /* write, not present */
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
write++;//执行到这里
break;
case 1: /* read, present */
goto bad_area;
case 0: /* read, not present */
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area;
}
/*
* If for any reason at all we couldn‘t handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
switch (handle_mm_fault(mm, vma, address, write)) {
case 1:
tsk->min_flt++;
break;
case 2:
tsk->maj_flt++;
break;
case 0:
goto do_sigbus;
default:
goto out_of_memory;
}
/*
* Did it hit the DOS screen memory VA from vm86 mode?
*/
if (regs->eflags & VM_MASK) {
unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
if (bit < 32)
tsk->thread.screen_bitmap |= 1 << bit;
}
up(&mm->mmap_sem);
return;
.......
}内核的中断/异常响应机制还传过来两个参数。一个是pt_regs结构指针regs,它指向例外发生前夕CPU中各寄存器内容的一份副本。而error_code则进一步指明映射失败的具体原因。
error_code:
bit 0 == 0 means no page found, 1 means protection fault
bit 1 == 0 means read, 1 means write
bit 2 == 0 means kernel, 1 means user-mode 此时,error_code为110,用户态,页面不在内存中,写。
handle_mm_fault函数,代码如下:
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
unsigned long address, int write_access)
{
int ret = -1;
pgd_t *pgd;
pmd_t *pmd;
pgd = pgd_offset(mm, address);//返回页面表项指针
pmd = pmd_alloc(pgd, address);//中转了一下,还是页目录表项指针
if (pmd) {
pte_t * pte = pte_alloc(pmd, address);//返回指向页表项的指针
if (pte)
ret = handle_pte_fault(mm, vma, address, write_access, pte);
}
return ret;
}static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
{
pte_t entry;
/*
* We need the page table lock to synchronize with kswapd
* and the SMP-safe atomic PTE updates.
*/
spin_lock(&mm->page_table_lock);
entry = *pte;//页表项中内容
if (!pte_present(entry)) {//页面不在内存中
/*
* If it truly wasn‘t present, we know that kswapd
* and the PTE updates will not touch it later. So
* drop the lock.
*/
spin_unlock(&mm->page_table_lock);
if (pte_none(entry))//页表项不为空
return do_no_page(mm, vma, address, write_access, pte);
return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);//执行到这里
}
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry);
spin_unlock(&mm->page_table_lock);
return 1;
}do_swap_page函数,如下:
static int do_swap_page(struct mm_struct * mm,
struct vm_area_struct * vma, unsigned long address,
pte_t * page_table, swp_entry_t entry, int write_access)
{
struct page *page = lookup_swap_cache(entry);//从hash表中寻找
pte_t pte;
if (!page) {
lock_kernel();
swapin_readahead(entry);//预读页面
page = read_swap_cache(entry);//真正得到一个页面,这个页面可能从hash表中寻找到,因为上面预读了。或者自己申请页面,并且从盘上将其内容读进来。
unlock_kernel();
if (!page)
return -1;
flush_page_to_ram(page);
flush_icache_page(vma, page);
}
mm->rss++;
pte = mk_pte(page, vma->vm_page_prot);//形成页表项
/*
* Freeze the "shared"ness of the page, ie page_count + swap_count.
* Must lock page before transferring our swap count to already
* obtained page count.
*/
lock_page(page);
swap_free(entry);
if (write_access && !is_page_shared(page))
pte = pte_mkwrite(pte_mkdirty(pte));//页表项赋予已写过对应的物理页,可进行读、写或者执行
UnlockPage(page);
set_pte(page_table, pte);//页表项(属性刚才已经设置了)指向对应的页面
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
return 1; /* Minor fault */
}一、下面分别解释各个函数。首先解释swapin_readahead函数,如下:
void swapin_readahead(swp_entry_t entry)
{
int i, num;
struct page *new_page;
unsigned long offset;
/*
* Get the number of handles we should do readahead io to. Also,
* grab temporary references on them, releasing them as io completes.
*/
num = valid_swaphandles(entry, &offset);
for (i = 0; i < num; offset++, i++) {
......
new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
if (new_page != NULL)
page_cache_release(new_page);//page使用计数减1
swap_free(SWP_ENTRY(SWP_TYPE(entry), offset));
}
return;
}提前预读相邻的盘面,根据下面的描述,__get_free_page,page使用计数为1,add_to_swap_cache,page使用计数再加1;此时page_cache_release,page使用计数又变成了1。直到有进程认领,才变成2。
struct page * read_swap_cache_async(swp_entry_t entry, int wait)
{
struct page *found_page = 0, *new_page;
unsigned long new_page_addr;
/*
* Make sure the swap entry is still in use.
*/
if (!swap_duplicate(entry)) /* Account for the swap cache */
goto out;
/*
* Look for the page in the swap cache.
*/
found_page = lookup_swap_cache(entry);//假设没有找到
if (found_page)
goto out_free_swap;
new_page_addr = __get_free_page(GFP_USER);//刚申请的page结构,使用计数为1
if (!new_page_addr)
goto out_free_swap; /* Out of memory */
new_page = virt_to_page(new_page_addr);//转化成对应的page结构指针
/*
* Check the swap cache again, in case we stalled above.
*/
found_page = lookup_swap_cache(entry);//假设没有找到
if (found_page)
goto out_free_page;
/*
* Add it to the swap cache and read its contents.
*/
lock_page(new_page);
add_to_swap_cache(new_page, entry);//加入到对应的链表上
rw_swap_page(READ, new_page, wait);//真正的把磁盘上的数据读到新申请的page上,等待块设备驱动一章再来看
return new_page;
out_free_page:
page_cache_release(new_page);
out_free_swap:
swap_free(entry);
out:
return found_page;
}
void add_to_swap_cache(struct page *page, swp_entry_t entry)
{
unsigned long flags;
#ifdef SWAP_CACHE_INFO
swap_cache_add_total++;
#endif
if (!PageLocked(page))
BUG();
if (PageTestandSetSwapCache(page))
BUG();
if (page->mapping)
BUG();
flags = page->flags & ~((1 << PG_error) | (1 << PG_arch_1));
page->flags = flags | (1 << PG_uptodate);
add_to_page_cache_locked(page, &swapper_space, entry.val);
}add_to_page_cache_locked函数,代码如下:
void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index)
{
if (!PageLocked(page))
BUG();
page_cache_get(page);//增加了使用计数,现在使用计数为2
spin_lock(&pagecache_lock);
page->index = index;//index存着页面交换项
add_page_to_inode_queue(mapping, page);//page->list链入mapping->clean_pages
add_page_to_hash_queue(page, page_hash(mapping, index));//page->next_hash和page->pprev_hash链入全局的Hash表
lru_cache_add(page);//page->lru链入了全局的active_list
spin_unlock(&pagecache_lock);
}static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page)
{
struct list_head *head = &mapping->clean_pages;
mapping->nrpages++;
list_add(&page->list, head);//page->list链入mapping->clean_pages
page->mapping = mapping;//mapping指向了swapper_space
}struct address_space swapper_space = {
LIST_HEAD_INIT(swapper_space.clean_pages),
LIST_HEAD_INIT(swapper_space.dirty_pages),
LIST_HEAD_INIT(swapper_space.locked_pages),
0, /* nrpages */
&swap_aops,
};static void add_page_to_hash_queue(struct page * page, struct page **p)
{
struct page *next = *p;//page->next_hash和page->pprev_hash链入全局的Hash表
*p = page;
page->next_hash = next;
page->pprev_hash = p;
if (next)
next->pprev_hash = &page->next_hash;
if (page->buffers)
PAGE_BUG(page);
atomic_inc(&page_cache_size);
}void lru_cache_add(struct page * page)
{
spin_lock(&pagemap_lru_lock);
if (!PageLocked(page))
BUG();
DEBUG_ADD_PAGE
add_page_to_active_list(page);
/* This should be relatively rare */
if (!page->age)
deactivate_page_nolock(page);
spin_unlock(&pagemap_lru_lock);
}#define add_page_to_active_list(page) { DEBUG_ADD_PAGE ZERO_PAGE_BUG SetPageActive(page); list_add(&(page)->lru, &active_list); \ //page->lru链入了全局的active_list
nr_active_pages++; \ //全局的nr_active_pages加1
}#define read_swap_cache(entry) read_swap_cache_async(entry, 1);还是调用read_swap_cache_async函数,只是本次执行,很可能从lookup_swap_cache函数,找到了page。
struct page * read_swap_cache_async(swp_entry_t entry, int wait)
{
struct page *found_page = 0, *new_page;
unsigned long new_page_addr;
/*
* Make sure the swap entry is still in use.
*/
if (!swap_duplicate(entry)) /* Account for the swap cache */
goto out;
/*
* Look for the page in the swap cache.
*/
found_page = lookup_swap_cache(entry);//假设在hash表中找到对应的page,有进程认领了,使用计数为2
if (found_page)
goto out_free_swap;
new_page_addr = __get_free_page(GFP_USER);
if (!new_page_addr)
goto out_free_swap; /* Out of memory */
new_page = virt_to_page(new_page_addr);
/*
* Check the swap cache again, in case we stalled above.
*/
found_page = lookup_swap_cache(entry);//有可能__get_free_page,没有足够的可分配的页面,切换到其他进程了,再切回来时,在Hash表中再寻找一遍
if (found_page)
goto out_free_page;
/*
* Add it to the swap cache and read its contents.
*/
lock_page(new_page);
add_to_swap_cache(new_page, entry);
rw_swap_page(READ, new_page, wait);
return new_page;
out_free_page:
page_cache_release(new_page);
out_free_swap:
swap_free(entry);
out:
return found_page;
} 三、lookup_swap_cache函数,如下:
struct page * lookup_swap_cache(swp_entry_t entry)
{
struct page *found;
#ifdef SWAP_CACHE_INFO
swap_cache_find_total++;
#endif
while (1) {
/*
* Right now the pagecache is 32-bit only. But it‘s a 32 bit index. =)
*/
repeat:
found = find_lock_page(&swapper_space, entry.val);//entry.val为页面交换项
if (!found)
return 0;
/*
* Though the "found" page was in the swap cache an instant
* earlier, it might have been removed by refill_inactive etc.
* Re search ... Since find_lock_page grabs a reference on
* the page, it can not be reused for anything else, namely
* it can not be associated with another swaphandle, so it
* is enough to check whether the page is still in the scache.
*/
if (!PageSwapCache(found)) {
UnlockPage(found);
page_cache_release(found);
goto repeat;
}
if (found->mapping != &swapper_space)
goto out_bad;
#ifdef SWAP_CACHE_INFO
swap_cache_find_success++;
#endif
UnlockPage(found);
return found;
}#define find_lock_page(mapping, index) __find_lock_page(mapping, index, page_hash(mapping, index))
struct page * __find_lock_page (struct address_space *mapping,
unsigned long offset, struct page **hash)
{
struct page *page;
/*
* We scan the hash list read-only. Addition to and removal from
* the hash-list needs a held write-lock.
*/
repeat:
spin_lock(&pagecache_lock);
page = __find_page_nolock(mapping, offset, *hash);//得到了hash表的其中一个链表的头
if (page) {
page_cache_get(page);//增加使用计数
spin_unlock(&pagecache_lock);
lock_page(page);
/* Is the page still hashed? Ok, good.. */
if (page->mapping)
return page;
/* Nope: we raced. Release and try again.. */
UnlockPage(page);
page_cache_release(page);
goto repeat;
}
spin_unlock(&pagecache_lock);
return NULL;
}static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page)
{
goto inside;
for (;;) {
page = page->next_hash;//从hash表中寻找
inside:
if (!page)
goto not_found;
if (page->mapping != mapping)
continue;
if (page->index == offset)
break;
}
/*
* Touching the page may move it to the active list.
* If we end up with too few inactive pages, we wake
* up kswapd.
*/
age_page_up(page);
if (inactive_shortage() > inactive_target / 2 && free_shortage())
wakeup_kswapd(0);
not_found:
return page;
}根据页面交换项,在hash表中寻找page结构。
swapin_readahead(entry);//预读页面
page = read_swap_cache(entry);//真正得到一个页面,这个页面可能从hash表中寻找到,因为上面预读了。或者自己申请页面,并且从盘上将其内容读进来。 read_swap_cache无论从hash表中读取页面,还是自己申请页面,并加入到对应的链表。最后使用计数都是2。 swapin_readahead预读了很多页面,如果没有被进程认领,那么使用计数为1。
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/44002859