您的位置:首页 > 其它

内存管理---伙伴系统---

2016-05-07 21:15 267 查看
内核使用伙伴系统来解决内存分配引起的外部碎片问题。

一、数据结构描述

struct zone {
<span style="white-space:pre">	</span>/**********/
struct free_area	free_area[MAX_ORDER]; 每一种元素对应一种块的大小。
<span style="white-space:pre">	</span>/*********/
};

struct free_area {
struct list_head	free_list[MIGRATE_TYPES];
unsigned long		nr_free;
};
free_area共有MAX_ORDER个元素,其中第order个元素记录了2^order的空闲块,这些空闲块在free_list中以双向链表的形式组织起来,对于同等大小的空闲块,其类型不同,将组织在不同的free_list中,这些节点对应的着struct page中的lru域;nr_free制定了大小为2^k页的空闲块的个数;
struct page {

。。。。。
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
。。。。。
}


对于:
free_area中的链表数组;

#define MIGRATE_UNMOVABLE 0
#define MIGRATE_RECLAIMABLE 1
#define MIGRATE_MOVABLE 2
#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */
#define MIGRATE_RESERVE 3
#define MIGRATE_ISOLATE 4 /* can't allocate from here */
#define MIGRATE_TYPES 5

MIGRATE_PCPTYPES是per_cpu_pageset,即用来表示每CPU页框高速缓存的数据结构中的链表的迁移类型数目

MIGRATE_RESERVE是在前三种的列表中都没用可满足分配的内存块时,就可以从MIGRATE_RESERVE分配

MIGRATE_ISOLATE用于跨越NUMA节点移动物理内存页,在大型系统上,它有益于将物理内存页移动到接近于是用该页最频繁地CPU

MIGRATE_TYPES表示迁移类型的数目

当一个指定的迁移类型所对应的链表中没有空闲块时,将会按以下定义的顺序到其他迁移类型的链表中寻找
static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_RESERVE },
[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_RESERVE },
[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
[MIGRATE_RESERVE]     = { MIGRATE_RESERVE,     MIGRATE_RESERVE,   MIGRATE_RESERVE }, /* Never used */
};




伙伴系统的初始化:

在初始化物理管理区的时候初始化伙伴系统的,具体实现在下面的函数中:

Start_kernel()->setup_arch()->paging_init()->zone_sizes_init()->free_area_init_nodes()->free_area_init_node()->free_area_init_core()->init_currently_empty_zone()->zone_init_free_lists();

static void __meminit zone_init_free_lists(struct zone *zone)
{
int order, t;
for_each_migratetype_order(order, t) {//宏替换 两层循环
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
}
#define for_each_migratetype_order(order, type) \
for (order = 0; order < MAX_ORDER; order++) \
for (type = 0; type < MIGRATE_TYPES; type++)


伙伴系统中数据初始化

将bootmem分配器中的数据回收到伙伴系统中

start_kernel()->mm_init()->mem_init

/*
* Set up kernel memory allocators
*/
static void __init mm_init(void)
{
/*
* page_cgroup requires contiguous pages,
* bigger than MAX_ORDER unless SPARSEMEM.
*/
page_cgroup_init_flatmem();
mem_init();
kmem_cache_init();//salb 高速缓存初始化
percpu_init_late();每cpu变量??
pgtable_cache_init();
vmalloc_init();//非连续内存管理 用到
}

init_32.c中

void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
int tmp;

pci_iommu_alloc(); //pci相关

#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
/*
* With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
* be done before free_all_bootmem(). Memblock use free low memory for
* temporary data (see find_range_array()) and for this purpose can use
* pages that was already passed to the buddy allocator, hence marked as
* not accessible in the page tables when compiled with
* CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
* important here.
*/
set_highmem_pages_init();

/* this will put all low memory onto the freelists */
<span style="white-space:pre">  /*释放bootmem中的内存到伙伴系统中,包括bootmem占有的位图 
     返回总共释放的页面数**/  </span>
totalram_pages += free_all_bootmem();

reservedpages = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
/*
* Only count reserved RAM pages:
*/
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
<span style="white-space:pre"> /*内核代码段、数据段、初始化端长度*/  </span>
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;

printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10,
totalhigh_pages << (PAGE_SHIFT-10));

printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
" .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
" .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
" .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,

#ifdef CONFIG_HIGHMEM
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
(LAST_PKMAP*PAGE_SIZE) >> 10,
#endif

VMALLOC_START, VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,

(unsigned long)__va(0), (unsigned long)high_memory,
((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,

(unsigned long)&__init_begin, (unsigned long)&__init_end,
((unsigned long)&__init_end -
(unsigned long)&__init_begin) >> 10,

(unsigned long)&_etext, (unsigned long)&_edata,
((unsigned long)&_edata - (unsigned long)&_etext) >> 10,

(unsigned long)&_text, (unsigned long)&_etext,
((unsigned long)&_etext - (unsigned long)&_text) >> 10);

/*
* Check boundaries twice: Some fundamental inconsistencies can
* be detected at build time already.
*/
#define __FIXADDR_TOP (-PAGE_SIZE)
#ifdef CONFIG_HIGHMEM
BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
#define high_memory (-128UL << 20)
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP

#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
BUG_ON(VMALLOC_START >= VMALLOC_END);
BUG_ON((unsigned long)high_memory > VMALLOC_START);

if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
}
其中会调用的几个函数:

/**
* free_all_bootmem - release free pages to the buddy allocator
*
* Returns the number of pages actually released.
*/
unsigned long __init free_all_bootmem(void)
{
unsigned long total_pages = 0;
bootmem_data_t *bdata;

list_for_each_entry(bdata, &bdata_list, list)
total_pages += free_all_bootmem_core(bdata);

return total_pages;
}
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
struct page *page;
unsigned long start, end, pages, count = 0;

if (!bdata->node_bootmem_map)
return 0;
<span style="white-space:pre">		 /*节点内存开始和结束处*/  </span>
start = bdata->node_min_pfn;
end = bdata->node_low_pfn;

bdebug("nid=%td start=%lx end=%lx\n",
bdata - bootmem_node_data, start, end);

while (start < end) {/*释放整个bootmem所涉及的内存*/  
unsigned long *map, idx, vec;

map = bdata->node_bootmem_map;
idx = start - bdata->node_min_pfn;/*相对于开始处的偏移*/  
vec = ~map[idx / BITS_PER_LONG];/*vec值为页面分配情况*/ 
<span style="white-space:pre">		  /*如果开始地址以BITS_PER_LONG</span><span style="white-space:pre">位对其、连续的BITS_PER_LONG个页面都没有被分配(空闲),并且 
       		 释放起点以上的BITS_PER_LONG个页面都是合法的(不超过end值),则释放连续的BITS_PER_LONG个 
       		 页面,*/  </span>
/*
* If we have a properly aligned and fully unreserved
* BITS_PER_LONG block of pages in front of us, free
* it in one go.
*/
if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
int order = ilog2(BITS_PER_LONG);

__free_pages_bootmem(pfn_to_page(start), order);/*释放到伙伴系统中*/  
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
unsigned long off = 0;

vec >>= start & (BITS_PER_LONG - 1);
while (vec) {
if (vec & 1) {/*如果页面空闲  注意一开始vec的值是经过取反后得来的*/ 
page = pfn_to_page(start + off);
__free_pages_bootmem(page, 0);
count++;/*更新释放页面总数*/  
}
vec >>= 1;/*vec向右移动一位,表示访问下一个页面*/ 
off++;
}
start = ALIGN(start + 1, BITS_PER_LONG);
}
}
<span style="white-space:pre">		/*虚拟地址转化为page 
    用于释放bdata中的位图所占有的内存*/ </span>
page = virt_to_page(bdata->node_bootmem_map);
pages = bdata->node_low_pfn - bdata->node_min_pfn;
pages = bootmem_bootmap_pages(pages); /*计算bootmem分配器中所使用的页面数,即位图使用的页面数*/  
count += pages;
while (pages--)//每次释放一个页面
__free_pages_bootmem(page++, 0);

bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);

return count;
}


void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
unsigned int loop;

prefetchw(page);?????????
for (loop = 0; loop < nr_pages; loop++) {
struct page *p = &page[loop];

if (loop + 1 < nr_pages)
prefetchw(p + 1);
__ClearPageReserved(p);
set_page_count(p, 0);/*设置页面的引用位为0*/ 
}

set_page_refcounted(page);/*设置页面的引用计数为1*/  
__free_pages(page, order);/*释放页面*/  
}

void __init set_highmem_pages_init(void)
{
struct zone *zone;
int nid;

for_each_zone(zone) {
unsigned long zone_start_pfn, zone_end_pfn;

if (!is_highmem(zone))/*验证是否属于高端内存区域中*/  
continue;
<span style="white-space:pre">		/*记录高端内存管理区的起始页框号和结束页框号*/  </span>
zone_start_pfn = zone->zone_start_pfn;
zone_end_pfn = zone_start_pfn + zone->spanned_pages;

nid = zone_to_nid(zone);
printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
zone->name, nid, zone_start_pfn, zone_end_pfn);
<span style="white-space:pre">			  /*将高端内存的页框添加到伙伴系统*/  </span>
add_highpages_with_active_regions(nid, zone_start_pfn,
zone_end_pfn);
}
totalram_pages += totalhigh_pages;

void __init add_highpages_with_active_regions(int nid,
unsigned long start_pfn, unsigned long end_pfn)
{
phys_addr_t start, end;
u64 i;

for_each_free_mem_range(i, nid, &start, &end, NULL) {
unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
start_pfn, end_pfn);
unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
start_pfn, end_pfn);
for ( ; pfn < e_pfn; pfn++)
if (pfn_valid(pfn))
add_one_highpage_init(pfn_to_page(pfn));
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: