标签:
一、文件映射的页面换入
在mmap后,mmap参考Linux内核源代码情景分析-系统调用mmap(),当这个区间的一个页面首次受到访问时,会由于见面无映射而发生缺页异常,相应的异常处理程序do_no_page()。
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
{
pte_t entry;
/*
* We need the page table lock to synchronize with kswapd
* and the SMP-safe atomic PTE updates.
*/
spin_lock(&mm->page_table_lock);
entry = *pte;
if (!pte_present(entry)) {
/*
* If it truly wasn‘t present, we know that kswapd
* and the PTE updates will not touch it later. So
* drop the lock.
*/
spin_unlock(&mm->page_table_lock);
if (pte_none(entry))
return do_no_page(mm, vma, address, write_access, pte);
return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
}
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry);
spin_unlock(&mm->page_table_lock);
return 1;
} 由于pte_none(entry)为true,所以执行do_no_page,代码如下:static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
{
struct page * new_page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, write_access, address);
/*
* The third argument is "no_share", which tells the low-level code
* to copy, not share the page even if sharing is possible. It‘s
* essentially an early COW detection.
*/
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);//指向了filemap_nopage
if (new_page == NULL) /* no page was available -- SIGBUS */
return 0;
if (new_page == NOPAGE_OOM)
return -1;
++mm->rss;
/*
* This silly early PAGE_DIRTY setting removes a race
* due to the bad i386 page protection. But it‘s valid
* for other architectures too.
*
* Note that if write_access is true, we either now have
* an exclusive copy of the page, or this is a shared mapping,
* so we can make it writable and dirty to avoid having to
* handle that later.
*/
flush_page_to_ram(new_page);
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access) {
entry = pte_mkwrite(pte_mkdirty(entry));
} else if (page_count(new_page) > 1 &&
!(vma->vm_flags & VM_SHARED))
entry = pte_wrprotect(entry);
set_pte(page_table, entry);//建立映射
/* no need to invalidate: a not-present page shouldn‘t be cached */
update_mmu_cache(vma, address, entry);
return 2; /* Major fault */
} filemap_nopage,分配一个空闲内存页面并从文件读入相应的页面。struct page * filemap_nopage(struct vm_area_struct * area,
unsigned long address, int no_share)
{
int error;
struct file *file = area->vm_file;
struct inode *inode = file->f_dentry->d_inode;
struct address_space *mapping = inode->i_mapping;//mapping来源于inode->i_mapping
struct page *page, **hash, *old_page;
unsigned long size, pgoff;
pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
retry_all:
/*
* An external ptracer can access pages that normally aren‘t
* accessible..
*/
size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if ((pgoff >= size) && (area->vm_mm == current->mm))
return NULL;
/*
* Do we have something in the page cache already?
*/
hash = page_hash(mapping, pgoff);
retry_find:
page = __find_get_page(mapping, pgoff, hash);//现在还没有找到
if (!page)
goto no_cached_page;//转到no_cache_page
/*
* Ok, found a page in the page cache, now we need to check
* that it‘s up-to-date.
*/
if (!Page_Uptodate(page))//分配到了页面后,转到page_not_update
goto page_not_uptodate;
success:
/*
* Try read-ahead for sequential areas.
*/
if (VM_SequentialReadHint(area))
nopage_sequential_readahead(area, pgoff, size);
/*
* Found the page and have a reference on it, need to check sharing
* and possibly copy it over to another page..
*/
old_page = page;
if (no_share) {
struct page *new_page = page_cache_alloc();
if (new_page) {
copy_user_highpage(new_page, old_page, address);
flush_page_to_ram(new_page);
} else
new_page = NOPAGE_OOM;
page_cache_release(page);
return new_page;
}
flush_page_to_ram(old_page);
return old_page;
no_cached_page:
/*
* If the requested offset is within our file, try to read a whole
* cluster of pages at once.
*
* Otherwise, we‘re off the end of a privately mapped file,
* so we need to map a zero page.
*/
if ((pgoff < size) && !VM_RandomReadHint(area))
error = read_cluster_nonblocking(file, pgoff, size);
else
error = page_cache_read(file, pgoff);//分配page结构,并加入到对应的队列中
/*
* The page we want has now been added to the page cache.
* In the unlikely event that someone removed it in the
* meantime, we‘ll just come back here and read it again.
*/
if (error >= 0)
goto retry_find;//返回到retry_find
/*
* An error return from page_cache_read can result if the
* system is low on memory, or a problem occurs while trying
* to schedule I/O.
*/
if (error == -ENOMEM)
return NOPAGE_OOM;
return NULL;
page_not_uptodate:
lock_page(page);
/* Did it get unhashed while we waited for it? */
if (!page->mapping) {
UnlockPage(page);
page_cache_release(page);
goto retry_all;
}
/* Did somebody else get it up-to-date? */
if (Page_Uptodate(page)) {
UnlockPage(page);
goto success;
}
if (!mapping->a_ops->readpage(file, page)) {
wait_on_page(page);
if (Page_Uptodate(page))
goto success;
}
/*
* Umm, take care of errors if the page isn‘t up-to-date.
* Try to re-read it _once_. We do this synchronously,
* because there really aren‘t any performance issues here
* and we need to check for errors.
*/
lock_page(page);
/* Somebody truncated the page on us? */
if (!page->mapping) {
UnlockPage(page);
page_cache_release(page);
goto retry_all;
}
/* Somebody else successfully read it in? */
if (Page_Uptodate(page)) {
UnlockPage(page);
goto success;
}
ClearPageError(page);
if (!mapping->a_ops->readpage(file, page)) {//指向ext2_readpage(),把文件从硬盘读入到内存页面中
wait_on_page(page);
if (Page_Uptodate(page))
goto success;
}
/*
* Things didn‘t work out. Return zero to tell the
* mm layer so, possibly freeing the page cache page first.
*/
page_cache_release(page);
return NULL;
} page_cache_read,分配page结构,并加入到对应的队列中,代码如下:static inline int page_cache_read(struct file * file, unsigned long offset)
{
struct inode *inode = file->f_dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
struct page **hash = page_hash(mapping, offset);
struct page *page;
spin_lock(&pagecache_lock);
page = __find_page_nolock(mapping, offset, *hash);
spin_unlock(&pagecache_lock);
if (page)
return 0;
page = page_cache_alloc();//分配page结构
if (!page)
return -ENOMEM;
if (!add_to_page_cache_unique(page, mapping, offset, hash)) {//加入到对应的队列中
int error = mapping->a_ops->readpage(file, page);
page_cache_release(page);
return error;
}
/*
* We arrive here in the unlikely event that someone
* raced with us and added our page to the cache first.
*/
page_cache_free(page);
return 0;
}此时:
page->list链入mapping->clean_pages;
page->next_hash和page->pprev_hash链入全局的Hash表;
page->lru链入了全局的active_list;
然后返回到retry_find,这次__find_get_page已经找到了,继续执行goto page_not_uptodate。mapping->a_ops->readpage,指向ext2_readpage(),把文件从硬盘读入到内存页面中。
refill_inactive_scan和swap_out,把活跃的页面变成不活跃脏的页面。挑选的原则是最近没有被访问,且age小于0。
page_launder,把不活跃脏的页面变成不活跃干净的页面。
不活跃脏的页面,有如下特点:
使用计数为1;
page->list链入mapping->dirty_pages/clean_pages;
page->next_hash和page->pprev_hash链入全局的Hash表;
page->lru链入了全局的inactive_dirty_list;
page->flags对应为设置为PG_dirty。
不活跃干净的页面,有如下特点:
使用计数为1;
page->list链入mapping->dirty_pages/clean_pages(保持原样);
page->next_hash和page->pprev_hash链入全局的Hash表;
page->lru链入了page->zone->inactive_clean_list;
swap_out,会调用try_to_swap_out,代码如下:
static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
pte_t pte;
swp_entry_t entry;
struct page * page;
int onlist;
pte = *page_table;
if (!pte_present(pte))//物理页面是否在内存中
goto out_failed;
page = pte_page(pte);
if ((!VALID_PAGE(page)) || PageReserved(page))
goto out_failed;
if (!mm->swap_cnt)
return 1;
mm->swap_cnt--;//被考察的页面数减1
onlist = PageActive(page);
/* Don‘t look at this pte if it‘s been accessed recently. */
if (ptep_test_and_clear_young(page_table)) {//如果页面被访问过,那么直接out_failed
age_page_up(page);
goto out_failed;
}
if (!onlist)
/* The page is still mapped, so it can‘t be freeable... */
age_page_down_ageonly(page);
......
if (page->age > 0)//如果页面的age不小于0,页out_failed
goto out_failed;
if (TryLockPage(page))
goto out_failed;
......
pte = ptep_get_and_clear(page_table);//走到这里,说明页面最近没有访问过,且age小于0,清空页目录项
flush_tlb_page(vma, address);
......
if (PageSwapCache(page)) {//page结构不在swapper_space队列中
entry.val = page->index;
if (pte_dirty(pte))
set_page_dirty(page);
set_swap_pte:
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
UnlockPage(page);
mm->rss--;
deactivate_page(page);
page_cache_release(page);
out_failed:
return 0;
}
flush_cache_page(vma, address);
if (!pte_dirty(pte))//或者这里跳转到drop_pte
goto drop_pte;
/*
* Ok, it‘s really dirty. That means that
* we should either create a new swap cache
* entry for it, or we should write it back
* to its own backing store.
*/
if (page->mapping) {//或者这里跳转到drop_pte
set_page_dirty(page);
goto drop_pte;
}
/*
* This is a dirty, swappable page. First of all,
* get a suitable swap entry for it, and make sure
* we have the swap cache set up to associate the
* page with that swap entry.
*/
entry = get_swap_page();
if (!entry.val)
goto out_unlock_restore; /* No swap space left */
/* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);
set_page_dirty(page);
goto set_swap_pte;
out_unlock_restore:
set_pte(page_table, pte);
UnlockPage(page);
} 对于用于文件映射的页面与普通的换入/换出页面有不同的处理。对于前者是解除页面映射,把页面表项设置成0;而对后者是断开页面映射,使页面表项指向盘上页面。三、文件映射的恢复映射
1、对于不活跃脏的页面和不活跃干净的页面, 如果发生缺页中断,因页面无映射而进入do_no_page(),而不像换入/换出页面那样进入do_swap_page()。
do_no_page中调用filemap_nopage,再次访问到这个页面,那么会调用__find_get_page,会在全局的Hash表找到对应的页面,并且引用计数加1,变成2,但还没有移到活跃队列中。什么时候转移到活跃队列中呢?
答案在,page_launder和reclaim_page中。
page_launder:
if (PageTestandClearReferenced(page) || page->age > 0 || //此时引用计数大于1
(!page->buffers && page_count(page) > 1) ||
page_ramdisk(page)) {
del_page_from_inactive_dirty_list(page);
add_page_to_active_list(page);
continue;
} reclaim_page:if (PageTestandClearReferenced(page) || page->age > 0 ||
(!page->buffers && page_count(page) > 1)) {//此时引用计数大于1
del_page_from_inactive_clean_list(page);
add_page_to_active_list(page);
continue;
} 如果发生缺页中断,do_no_page,再调用filemap_nopage,再次访问到这个页面,调用__find_get_page为NULL,重新执行page_cache_read,分配一个空闲内存页面并从文件读入相应的页面。
此时:
page->list链入mapping->clean_pages;
page->next_hash和page->pprev_hash链入全局的Hash表;
page->lru链入了全局的active_list;
Linux内核源代码情景分析-mmap后,文件与虚拟区间建立映射
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/45197997