标签:
在Linux内核源代码情景分析-共享内存中,共享内存,当内存紧张时是换出到交换分区。
在Linux内核源代码情景分析-mmap后,文件与虚拟区间建立映射中,文件映射的页面,当内存紧张时是换出到硬盘上的文件中。
这里的交换分区,就是是swap分区,记得给电脑安装ubuntu时,就有一项是swap分区。
交换分区和文件的区别是:
文件是在一个具体的文件系统之下的,交换分区没有这个必要,它可能是一个裸分区,不需要文件系统。
需要说明一点,并不是所有从物理内存中交换出来的数据都会被放到Swap中(如果这样的话,Swap就会不堪重负),有相当一部分数据被直接交换到文件系统。例如,有的程序会打开一些文件,对文件进行读写(其实每个程序都至少要打开一个文件,那就是运行程序本身),当需要将这些程序的内存空间交换出去时,就没有必要将文件部分的数据放到Swap空间中了,而可以直接将其放到文件里去。但是那些用malloc和new函数生成的对象的数据则不同,它们需要Swap空间,因为它们在文件系统中没有相应的“储备”文件,因此被称作“匿名”(Anonymous)内存数据。这类数据还包括堆栈中的一些状态和变量数据等。所以说,Swap空间是“匿名”数据的交换空间。
总结:
当内存紧张时:
共享内存区,malloc和new函数生成的对象的数据,堆栈中的一些状态和变量数据都被换出到swap分区。
文件映射的页面,换出到硬盘上的文件中。
现在来分析最后一种情况,页面中存放的是堆栈中的一些状态和变量数据。
一、堆栈页面的换入
由于pte_none(entry)为true,所以调用do_no_page()。
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
{
pte_t entry;
/*
* We need the page table lock to synchronize with kswapd
* and the SMP-safe atomic PTE updates.
*/
spin_lock(&mm->page_table_lock);
entry = *pte;
if (!pte_present(entry)) {
/*
* If it truly wasn‘t present, we know that kswapd
* and the PTE updates will not touch it later. So
* drop the lock.
*/
spin_unlock(&mm->page_table_lock);
if (pte_none(entry))
return do_no_page(mm, vma, address, write_access, pte);
return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
}
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry);
spin_unlock(&mm->page_table_lock);
return 1;
}static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
{
struct page * new_page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)//由于nopage为NULL
return do_anonymous_page(mm, vma, page_table, write_access, address);
/*
* The third argument is "no_share", which tells the low-level code
* to copy, not share the page even if sharing is possible. It‘s
* essentially an early COW detection.
*/
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
if (new_page == NULL) /* no page was available -- SIGBUS */
return 0;
if (new_page == NOPAGE_OOM)
return -1;
++mm->rss;
/*
* This silly early PAGE_DIRTY setting removes a race
* due to the bad i386 page protection. But it‘s valid
* for other architectures too.
*
* Note that if write_access is true, we either now have
* an exclusive copy of the page, or this is a shared mapping,
* so we can make it writable and dirty to avoid having to
* handle that later.
*/
flush_page_to_ram(new_page);
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access) {
entry = pte_mkwrite(pte_mkdirty(entry));
} else if (page_count(new_page) > 1 &&
!(vma->vm_flags & VM_SHARED))
entry = pte_wrprotect(entry);
set_pte(page_table, entry);
/* no need to invalidate: a not-present page shouldn‘t be cached */
update_mmu_cache(vma, address, entry);
return 2; /* Major fault */
} 由于vma->vm_ops->nopage为NULL,所以执行do_anonymous_page,代码如下:static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
struct page *page = NULL;
pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
if (write_access) {
page = alloc_page(GFP_HIGHUSER);//分配页面
if (!page)
return -1;
clear_user_highpage(page, addr);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
mm->rss++;
flush_page_to_ram(page);
}
set_pte(page_table, entry);//并建立映射
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
return 1; /* Minor fault */
}换入后page并未加入到任何队列。
refill_inactive_scan和swap_out,把活跃的页面变成不活跃脏的页面。挑选的原则是最近没有被访问,且age小于0。这里的活跃页面是先加入到对应的队列中,再脱离。详见swap_out调用try_to_swap_out的代码分析。
page_launder,把不活跃脏的页面变成不活跃干净的页面。
try_to_swap_out,代码如下:
static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
pte_t pte;
swp_entry_t entry;
struct page * page;
int onlist;
pte = *page_table;
if (!pte_present(pte))//物理页面是否在内存中
goto out_failed;
page = pte_page(pte);
if ((!VALID_PAGE(page)) || PageReserved(page))
goto out_failed;
if (!mm->swap_cnt)
return 1;
mm->swap_cnt--;//被考察的页面数减1
onlist = PageActive(page);
/* Don‘t look at this pte if it‘s been accessed recently. */
if (ptep_test_and_clear_young(page_table)) {//如果页面被访问过,那么直接out_failed
age_page_up(page);
goto out_failed;
}
if (!onlist)
/* The page is still mapped, so it can‘t be freeable... */
age_page_down_ageonly(page);
......
if (page->age > 0)//如果页面的age不小于0,页out_failed
goto out_failed;
if (TryLockPage(page))
goto out_failed;
......
pte = ptep_get_and_clear(page_table);//走到这里,说明页面最近没有访问过,且age小于0,清空页目录项
flush_tlb_page(vma, address);
......
if (PageSwapCache(page)) {//page结构不在swapper_space队列中
entry.val = page->index;
if (pte_dirty(pte))
set_page_dirty(page);
set_swap_pte:
swap_duplicate(entry);
set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
UnlockPage(page);
mm->rss--;
deactivate_page(page);
page_cache_release(page);
out_failed:
return 0;
}
flush_cache_page(vma, address);
if (!pte_dirty(pte))//不执行这里
goto drop_pte;
/*
* Ok, it‘s really dirty. That means that
* we should either create a new swap cache
* entry for it, or we should write it back
* to its own backing store.
*/
if (page->mapping) {//也不执行这里
set_page_dirty(page);
goto drop_pte;
}
/*
* This is a dirty, swappable page. First of all,
* get a suitable swap entry for it, and make sure
* we have the swap cache set up to associate the
* page with that swap entry.
*/
entry = get_swap_page();//执行这里,分配一个交换分区
if (!entry.val)
goto out_unlock_restore; /* No swap space left */
/* Add it to the swap cache and mark it dirty */
add_to_swap_cache(page, entry);//把page加入到对应的队列
set_page_dirty(page);
goto set_swap_pte;//跳转到set_swap_pte
out_unlock_restore:
set_pte(page_table, pte);
UnlockPage(page);
} add_to_swap_cache,代码如下:void add_to_swap_cache(struct page *page, swp_entry_t entry)
{
unsigned long flags;
#ifdef SWAP_CACHE_INFO
swap_cache_add_total++;
#endif
if (!PageLocked(page))
BUG();
if (PageTestandSetSwapCache(page))
BUG();
if (page->mapping)
BUG();
flags = page->flags & ~((1 << PG_error) | (1 << PG_arch_1));
page->flags = flags | (1 << PG_uptodate);
add_to_page_cache_locked(page, &swapper_space, entry.val);
}void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index)
{
if (!PageLocked(page))
BUG();
page_cache_get(page);
spin_lock(&pagecache_lock);
page->index = index;
add_page_to_inode_queue(mapping, page);
add_page_to_hash_queue(page, page_hash(mapping, index));
lru_cache_add(page);
spin_unlock(&pagecache_lock);
}此时:
page->list链入mapping->clean_pages;
page->next_hash和page->pprev_hash链入全局的Hash表;
page->lru链入了全局的active_list;
然后返回到try_to_swap_out,继续执行go set_swap_pte,继续执行如下:
set_swap_pte: swap_duplicate(entry); set_pte(page_table, swp_entry_to_pte(entry));//页目录项指向盘上数据块的地址 drop_pte: UnlockPage(page); mm->rss--; deactivate_page(page);//从active_list到inactive_list page_cache_release(page);此时:
page->list链入mapping->dirty_pages/clean_pages;
page->next_hash和page->pprev_hash链入全局的Hash表;
page->lru链入了全局的inactive_dirty_list;
三、堆栈页面恢复映射
1、对于不活跃脏的页面和不活跃干净的页面, 如果发生缺页中断,会调用do_swap_page()。
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
{
pte_t entry;
/*
* We need the page table lock to synchronize with kswapd
* and the SMP-safe atomic PTE updates.
*/
spin_lock(&mm->page_table_lock);
entry = *pte;
if (!pte_present(entry)) {
/*
* If it truly wasn‘t present, we know that kswapd
* and the PTE updates will not touch it later. So
* drop the lock.
*/
spin_unlock(&mm->page_table_lock);
if (pte_none(entry))//有值
return do_no_page(mm, vma, address, write_access, pte);
return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);//有值,所以会调用这个
}
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address, pte, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
establish_pte(vma, address, pte, entry);
spin_unlock(&mm->page_table_lock);
return 1;
}因为页目录表有值,指向了交换分区,所以开始执行do_swap_page()。
static int do_swap_page(struct mm_struct * mm,
struct vm_area_struct * vma, unsigned long address,
pte_t * page_table, swp_entry_t entry, int write_access)
{
struct page *page = lookup_swap_cache(entry);
pte_t pte;
if (!page) {
lock_kernel();
swapin_readahead(entry);
page = read_swap_cache(entry);
unlock_kernel();
if (!page)
return -1;
flush_page_to_ram(page);
flush_icache_page(vma, page);
}
mm->rss++;
pte = mk_pte(page, vma->vm_page_prot);
/*
* Freeze the "shared"ness of the page, ie page_count + swap_count.
* Must lock page before transferring our swap count to already
* obtained page count.
*/
lock_page(page);
swap_free(entry);
if (write_access && !is_page_shared(page))
pte = pte_mkwrite(pte_mkdirty(pte));
UnlockPage(page);
set_pte(page_table, pte);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
return 1; /* Minor fault */
} 在do_swap_page中,lookup_swap_cache,会在全局的Hash表找到对应的页面,并且引用计数加1,变成2,但还没有移到活跃队列中。什么时候转移到活跃队列中呢?答案在,page_launder和reclaim_page中。
page_launder:
if (PageTestandClearReferenced(page) || page->age > 0 || //此时引用计数大于1
(!page->buffers && page_count(page) > 1) ||
page_ramdisk(page)) {
del_page_from_inactive_dirty_list(page);
add_page_to_active_list(page);
continue;
} reclaim_page:if (PageTestandClearReferenced(page) || page->age > 0 ||
(!page->buffers && page_count(page) > 1)) {//此时引用计数大于1
del_page_from_inactive_clean_list(page);
add_page_to_active_list(page);
continue;
} 由于lookup_swap_cache返回NULL,所以接下来的流程就是我们原来分析过的一篇文章,Linux内核源代码情景分析-内存管理之用户页面的换入。
标签:
原文地址:http://blog.csdn.net/jltxgcy/article/details/45198861