标签:flush 位置 mon creation pac any lis additions support
linux虚拟内存管理功能
/*
* This handles the memory map.. We could make this a config
* option, but too many people screw it up, and too few need
* it.
*
* A __PAGE_OFFSET of 0xC0000000 means that the kernel has
* a virtual address space of one gigabyte, which limits the
* amount of physical memory you can use to about 950MB.
*
* If you want more physical memory than this then see the CONFIG_HIGHMEM4G
* and CONFIG_HIGHMEM64G options in the kernel configuration.
*/
#define __PAGE_OFFSET (0xC0000000)
……
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
1.页表的初步初始化
/*
* The page tables are initialized to only 8MB here - the final page
* tables are set up later depending on memory size.
*/
.org 0x2000
ENTRY(pg0)//存放的是虚拟地址
.org 0x3000
ENTRY(pg1)
/*
* empty_zero_page must immediately follow the page tables ! ( The
* initialization loop counts until empty_zero_page)
*/
.org 0x4000
ENTRY(empty_zero_page)
/*
* Initialize page tables
*/
movl $pg0-__PAGE_OFFSET,%edi /* initialize page tables 将物理地址存放在edi中,位置为0x1002000处*/
movl $007,%eax /* "007" doesn‘t mean with right to kill, but
PRESENT+RW+USER */
2: stosl
add $0x1000,%eax
cmp $empty_zero_page-__PAGE_OFFSET,%edi
jne 2b
/*
* This is initialized to create an identity-mapping at 0-8M ( for bootup
* purposes) and another mapping of the 0-8M area at virtual address
* PAGE_OFFSET.
*/
.org 0x1000
ENTRY(swapper_pg_dir)
.long 0x00102007 //
两个页表是用户页表、可写且页表的内容在内存。.long 0x00103007
.fill BOOT_USER_PGD_PTRS-2,4,0
/* default: 766 entries */
.long 0x00102007
.long 0x00103007
/* default: 254 entries */
.fill BOOT_KERNEL_PGD_PTRS-2,4,0
/*
* Enable paging
*/
3:
movl $swapper_pg_dir-__PAGE_OFFSET,%eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $0x80000000,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
jmp 1f /* flush the prefetch-queue */
1:
movl $1f,%eax
jmp *%eax /* make sure eip is relocated */
struct page{
struct list_head list; //通过使用它进入下面的数据结构free_area_struct结构中的双向链队列
struct address_space * mapping; //用于内存交换的数据结构
unsigned long index;//当页面进入交换文件后,指向其去向
struct page *next_hash; //自身的指针,这样就可以链接成一个链表
atomic t count; //用于页面交换的计数,若页面为空闲则为0,分配就赋值1,没建立或恢复一次映射就加1,断开映射就减一
unsigned long flags;//反应页面各种状态,例如活跃,不活跃脏,不活跃干净,空闲
struct list_head lru;
unsigned long age; //表示页面寿命
wait_queue_head_t wait;
struct page ** pprev_hash;
struct buffer_head * buffers;
void * virtual
struct zone_struct * zone; //指向所属的管理区
}
typedef struct pglist_data {
zone_t node_zones[MAX_NR_ZONES];//节点的最多3个页面管理区
zonelist_t node_zonelists[GFP_ZONEMASK+1];//一个管理区指针数组,指向上面的管理区
int nr_zones;
struct page *node_mem_map;//指向具体节点的page结构数组
unsigned long *valid_addr_bitmap;
struct bootmem_data *bdata;
unsigned long node_start_paddr;
unsigned long node_start_mapnr;
unsigned long node_size;
int node_id;
struct pglist_data *node_next;//形成一个单链表节点队列
} pg_data_t;
typedef struct zonelist_struct {
zone_t *zone[MAX_NR_ZONE+1]; //NULL delimited 管理区
Int gfp_mast;
} zonelist_t
typedef struct zone_struct {
/*
* Commonly accessed fields:
*/
spinlock_t lock; 用于暴走对该结构中其他域的串行访问
unsigned long free_pages;//这个区中现有空闲页的个数
unsigned long pages_min, pages_low, pages_high;//对这个区最少,次少及最多页面个数的描述
int need_balance;//与kswapd合在一起
/*
* free areas of different sizes
*/
free_area_t free_area[MAX_ORDER];/在伙伴分配系统中的位图数组和页面链表
/*
* Discontig memory support fields.
*/
struct pglist_data *zone_pgdat;//本管理区所在的存储节点
struct page *zone_mem_map;//本管理区的内存映射表
unsigned long zone_start_paddr;//本管理区的物理地址
unsigned long zone_start_mapnr;//mem_map索引
/*
* rarely used fields:
*/
char *name;
unsigned long size;
} zone_t;
type struct free_area_struct {
struct list_head free_list
unsigned int *map
} free_area_t
//表示哪种分配策略,order表示所需物理块的大小,1,2,4.....
struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order)
{
struct page *ret = 0;
pg_data_t *start, *temp;
#ifndef CONFIG_NUMA
unsigned long flags;
static pg_data_t *next = 0;
#endif
if (order >= MAX_ORDER)
return NULL;
#ifdef CONFIG_NUMA
temp = NODE_DATA(numa_node_id());//通过NUMA_DATA()找到cpu所在节点的数据结构队列,存放在temp中
#else
spin_lock_irqsave(&node_lock, flags);
if (!next) next = pgdat_list;
temp = next;
next = next->node_next;
spin_unlock_irqrestore(&node_lock, flags);
#endif
start = temp;
while (temp) {
if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))//从当前节点扫到最后节点,能否满足分配内存
return(ret);
temp = temp->node_next;
}
temp = pgdat_list;
while (temp != start) {//从头节点扫到当前节点,视图分配内存
if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
return(ret);
temp = temp->node_next;
}
return(0);
}
#ifndef CONFIG_DISCONTIGMEM
static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
{
/*
* Gets optimized away by the compiler.
*/
if (order >= MAX_ORDER)
return NULL;
return __alloc_pages(gfp_mask, order,
contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
}
#endi
struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
{
unsigned long min;
zone_t **zone, * classzone;
struct page * page;
int freed;
zone = zonelist->zones;
classzone = *zone;
min = 1UL << order;
for (;;) {//遍历各种区空闲页面的总量
zone_t *z = *(zone++);
if (!z)
break;
min += z->pages_low;
if (z->free_pages > min) {//如果总理安大于最低水平线与所请求页面数之和,就调用rmqueue()试图分配
page = rmqueue(z, order);
if (page)
return page;//分配成功,返回第一page指针
}
}
classzone->need_balance = 1;
mb();
if (waitqueue_active(&kswapd_wait))
wake_up_interruptible(&kswapd_wait);
zone = zonelist->zones;
min = 1UL << order;
for (;;) {
unsigned long local_min;
zone_t *z = *(zone++);
if (!z)
break;
local_min = z->pages_min;
if (!(gfp_mask & __GFP_WAIT))
local_min >>= 2;
min += local_min;
if (z->free_pages > min) {
page = rmqueue(z, order);
if (page)
return page;
}
}
if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
zone = zonelist->zones;
for (;;) {
zone_t *z = *(zone++);
if (!z)
break;
page = rmqueue(z, order);
if (page)
return page;
}
return NULL;
}
/* Atomic allocations - we can‘t balance anything */
if (!(gfp_mask & __GFP_WAIT))
return NULL;
page = balance_classzone(classzone, gfp_mask, order, &freed);
if (page)
return page;
//指向要分配页面的管理区,order表示分配页面数为2的order次方
static struct page * rmqueue(zone_t *zone, unsigned int order)
{
//area指向free_area的第order个元素
free_area_t * area = zone->free_area + order;
unsigned int curr_order = order;
struct list_head *head, *curr;
unsigned long flags;
struct page *page;
spin_lock_irqsave(&zone->lock, flags);
do {
head = &area->free_list;
curr = memlist_next(head);
if (curr != head) {
unsigned int index;
//获得空闲块的第 1 个页面的地址,如果这个地址是个无效的地址,就陷入 BUG()
page = memlist_entry(curr, struct page, list);
if (BAD_RANGE(zone,page))
BUG();
//从队列中摘除分配出去的页面块。
memlist_del(curr);
index = page - zone->zone_mem_map;
if (curr_order != MAX_ORDER-1)
//如果某个页面块被分配出去,就要在 frea_area 的位图中进行标记,这是通过调用 MARK_USED()宏来完成的。
MARK_USED(index, curr_order, area);
zone->free_pages -= 1UL << order;
//如果分配出去后还有剩余块,就通过 expand()获得所分配的页块,而把剩余块链入适当的空闲队列中。
page = expand(zone, page, index, order, curr_order, area);
spin_unlock_irqrestore(&zone->lock, flags);
set_page_count(page, 1);
if (BAD_RANGE(zone,page))
BUG();
if (PageLRU(page))
BUG();
if (PageActive(page))
BUG();
return page;
}
curr_order++;
area++;
} while (curr_order < MAX_ORDER);//如果当前空闲队列没有空闲块,就从更大的空闲块队列中找。
spin_unlock_irqrestore(&zone->lock, flags);
return NULL;
}
/*
zone指向已分配页块所在的管理区
page指向一分配的页块
index为一分配的页块在mem_map中的下标;
low表示所需页面块的大小为2的low次方
high表示从实际空闲队列中实际分配的页面块大小为2的high次方
area指向要实际分配的页块
*/
static inline struct page * expand (zone_t *zone, struct page *page,
unsigned long index, int low, int high, free_area_t * area)
{
unsigned long size = 1 << high;//初始化为分配块的页面数
while (high > low) {
if (BAD_RANGE(zone,page))
BUG();
area--;
high--;
size >>= 1;
memlist_add_head(&(page)->list, &(area)->free_list);
/*然后调用 memlist_add_head()把刚分配出去的页面块又加入到低一档(物理块减半)的空闲队列中准备从剩下的一半空闲块中重新进
行分配*/
//MARK_USED()设置位图
MARK_USED(index, high, area);
index += size;
page += size;
}
if (BAD_RANGE(zone,page))
BUG();
return page;
}
typedef struct slab_s {
struct list_head list;
unsigned long colouroff;//slab上着色区的大小
void *s_mem; /*指向对象区的起点 */
unsigned int inuse; /* 分配对象的个数 */
kmem_bufctl_t free;//空闲对象链的第一个对象
} slab_t;
struct kmem_cache_s {
/* 1) each alloc & free */
/* full, partial first, then free */
struct list_head slabs_full;
struct list_head slabs_partial;
struct list_head slabs_free;
unsigned int objsize;
原始的数据结构的大小.初始化为kemem_cache_t的大小unsigned int flags; /* constant flags */
unsigned int num; //
每个slab obj的个数spinlock_t spinlock;
#ifdef CONFIG_SMP
unsigned int batchcount;
#endif
/* 2) slab additions /removals */
/* order of pgs per slab (2^n) */
unsigned int gfporder;//
gfporder 则表示每个 Slab 大小的对数,即每个 Slab 由 2 gfporder 个页面构成。/* force GFP flags, e.g. GFP_DMA */
unsigned int gfpflags;
size_t colour; /* 颜色数目 */
unsigned int colour_off; /*颜色的偏移量 */
unsigned int colour_next; /* 下一个slab将要使用的颜色 */
kmem_cache_t *slabp_cache;
unsigned int growing;
unsigned int dflags; /* dynamic flags */
/* constructor func */
void (*ctor)(void *, kmem_cache_t *, unsigned long);
/* de-constructor func */
void (*dtor)(void *, kmem_cache_t *, unsigned long);
unsigned long failures;
/* 3) cache creation/removal */
char name[CACHE_NAMELEN];
struct list_head next;
#ifdef CONFIG_SMP
/* 4) per-cpu data */
cpucache_t *cpudata[NR_CPUS];
#endif
…..
};
static kmem_cache_t cache_cache = {
slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full) ,
slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial),
slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free) ,
objsize: sizeof(kmem_cache_t),//原始的数据结构的大小.初始化为kemem_cache_t的大小
flags: SLAB_NO_REAP,
spinlock: SPIN_LOCK_UNLOCKED,
colour_off: L1_CACHE_BYTES,
name: "kmem_cache",
};
//缓冲区名 对象大小 所请求的着色偏移量
kmem_cache_t *kmem_cache_create(const char *name, size_t size, size_t offset,
unsigned long c_flags,//对缓冲区的设置标志,SLAB_HWCACHE_ALIGN:表示与第一个高速缓冲中的行边界对齐
//指向对象指针 ,指向缓冲区
void (*ctor) (void *objp, kmem_cache_t *cachep, unsigned long flags),//构造函数,一般为NULL
void (*dtor) (void *objp, kmem_cache_t *cachep, unsigned long flags))//析构函数一般为NULL
void *kmalloc(size_t size, int flags);
Void kree(const void *objp);
struct vm_struct {
unsigned long flags;
void * addr;//内存区的起始地址
unsigned long size;//内存区大小+4096(安全区的大小)
struct vm_struct * next;
};
struct vm_struct * vmlist;//非连续区组成一个单链表
标签:flush 位置 mon creation pac any lis additions support
原文地址:http://www.cnblogs.com/zengyiwen/p/5fd4435a0f2f98a8fd9d4551c42d49f6.html