内存管理---伙伴系统---
2016-05-07 21:15
267 查看
内核使用伙伴系统来解决内存分配引起的外部碎片问题。
一、数据结构描述
struct page {
。。。。。
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
。。。。。
}
对于:
free_area中的链表数组;
#define MIGRATE_UNMOVABLE 0
#define MIGRATE_RECLAIMABLE 1
#define MIGRATE_MOVABLE 2
#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */
#define MIGRATE_RESERVE 3
#define MIGRATE_ISOLATE 4 /* can't allocate from here */
#define MIGRATE_TYPES 5
MIGRATE_PCPTYPES是per_cpu_pageset,即用来表示每CPU页框高速缓存的数据结构中的链表的迁移类型数目
MIGRATE_RESERVE是在前三种的列表中都没用可满足分配的内存块时,就可以从MIGRATE_RESERVE分配
MIGRATE_ISOLATE用于跨越NUMA节点移动物理内存页,在大型系统上,它有益于将物理内存页移动到接近于是用该页最频繁地CPU
MIGRATE_TYPES表示迁移类型的数目
当一个指定的迁移类型所对应的链表中没有空闲块时,将会按以下定义的顺序到其他迁移类型的链表中寻找
伙伴系统的初始化:
在初始化物理管理区的时候初始化伙伴系统的,具体实现在下面的函数中:
Start_kernel()->setup_arch()->paging_init()->zone_sizes_init()->free_area_init_nodes()->free_area_init_node()->free_area_init_core()->init_currently_empty_zone()->zone_init_free_lists();
static void __meminit zone_init_free_lists(struct zone *zone)
{
int order, t;
for_each_migratetype_order(order, t) {//宏替换 两层循环
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
}
伙伴系统中数据初始化
将bootmem分配器中的数据回收到伙伴系统中
start_kernel()->mm_init()->mem_init
/*
* Set up kernel memory allocators
*/
static void __init mm_init(void)
{
/*
* page_cgroup requires contiguous pages,
* bigger than MAX_ORDER unless SPARSEMEM.
*/
page_cgroup_init_flatmem();
mem_init();
kmem_cache_init();//salb 高速缓存初始化
percpu_init_late();每cpu变量??
pgtable_cache_init();
vmalloc_init();//非连续内存管理 用到
}
init_32.c中
void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
int tmp;
pci_iommu_alloc(); //pci相关
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
/*
* With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
* be done before free_all_bootmem(). Memblock use free low memory for
* temporary data (see find_range_array()) and for this purpose can use
* pages that was already passed to the buddy allocator, hence marked as
* not accessible in the page tables when compiled with
* CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
* important here.
*/
set_highmem_pages_init();
/* this will put all low memory onto the freelists */
<span style="white-space:pre"> /*释放bootmem中的内存到伙伴系统中,包括bootmem占有的位图
返回总共释放的页面数**/ </span>
totalram_pages += free_all_bootmem();
reservedpages = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
/*
* Only count reserved RAM pages:
*/
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
<span style="white-space:pre"> /*内核代码段、数据段、初始化端长度*/ </span>
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10,
totalhigh_pages << (PAGE_SHIFT-10));
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
" .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
" .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
" .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,
#ifdef CONFIG_HIGHMEM
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
(LAST_PKMAP*PAGE_SIZE) >> 10,
#endif
VMALLOC_START, VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,
(unsigned long)__va(0), (unsigned long)high_memory,
((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
(unsigned long)&__init_begin, (unsigned long)&__init_end,
((unsigned long)&__init_end -
(unsigned long)&__init_begin) >> 10,
(unsigned long)&_etext, (unsigned long)&_edata,
((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
(unsigned long)&_text, (unsigned long)&_etext,
((unsigned long)&_etext - (unsigned long)&_text) >> 10);
/*
* Check boundaries twice: Some fundamental inconsistencies can
* be detected at build time already.
*/
#define __FIXADDR_TOP (-PAGE_SIZE)
#ifdef CONFIG_HIGHMEM
BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
#define high_memory (-128UL << 20)
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP
#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
BUG_ON(VMALLOC_START >= VMALLOC_END);
BUG_ON((unsigned long)high_memory > VMALLOC_START);
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
}
其中会调用的几个函数:
/**
* free_all_bootmem - release free pages to the buddy allocator
*
* Returns the number of pages actually released.
*/
unsigned long __init free_all_bootmem(void)
{
unsigned long total_pages = 0;
bootmem_data_t *bdata;
list_for_each_entry(bdata, &bdata_list, list)
total_pages += free_all_bootmem_core(bdata);
return total_pages;
}
一、数据结构描述
struct zone { <span style="white-space:pre"> </span>/**********/ struct free_area free_area[MAX_ORDER]; 每一种元素对应一种块的大小。 <span style="white-space:pre"> </span>/*********/ };
struct free_area { struct list_head free_list[MIGRATE_TYPES]; unsigned long nr_free; };free_area共有MAX_ORDER个元素,其中第order个元素记录了2^order的空闲块,这些空闲块在free_list中以双向链表的形式组织起来,对于同等大小的空闲块,其类型不同,将组织在不同的free_list中,这些节点对应的着struct page中的lru域;nr_free制定了大小为2^k页的空闲块的个数;
struct page {
。。。。。
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
。。。。。
}
对于:
free_area中的链表数组;
#define MIGRATE_UNMOVABLE 0
#define MIGRATE_RECLAIMABLE 1
#define MIGRATE_MOVABLE 2
#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */
#define MIGRATE_RESERVE 3
#define MIGRATE_ISOLATE 4 /* can't allocate from here */
#define MIGRATE_TYPES 5
MIGRATE_PCPTYPES是per_cpu_pageset,即用来表示每CPU页框高速缓存的数据结构中的链表的迁移类型数目
MIGRATE_RESERVE是在前三种的列表中都没用可满足分配的内存块时,就可以从MIGRATE_RESERVE分配
MIGRATE_ISOLATE用于跨越NUMA节点移动物理内存页,在大型系统上,它有益于将物理内存页移动到接近于是用该页最频繁地CPU
MIGRATE_TYPES表示迁移类型的数目
当一个指定的迁移类型所对应的链表中没有空闲块时,将会按以下定义的顺序到其他迁移类型的链表中寻找
static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ };
伙伴系统的初始化:
在初始化物理管理区的时候初始化伙伴系统的,具体实现在下面的函数中:
Start_kernel()->setup_arch()->paging_init()->zone_sizes_init()->free_area_init_nodes()->free_area_init_node()->free_area_init_core()->init_currently_empty_zone()->zone_init_free_lists();
static void __meminit zone_init_free_lists(struct zone *zone)
{
int order, t;
for_each_migratetype_order(order, t) {//宏替换 两层循环
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
}
#define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++)
伙伴系统中数据初始化
将bootmem分配器中的数据回收到伙伴系统中
start_kernel()->mm_init()->mem_init
/*
* Set up kernel memory allocators
*/
static void __init mm_init(void)
{
/*
* page_cgroup requires contiguous pages,
* bigger than MAX_ORDER unless SPARSEMEM.
*/
page_cgroup_init_flatmem();
mem_init();
kmem_cache_init();//salb 高速缓存初始化
percpu_init_late();每cpu变量??
pgtable_cache_init();
vmalloc_init();//非连续内存管理 用到
}
init_32.c中
void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
int tmp;
pci_iommu_alloc(); //pci相关
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
/*
* With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
* be done before free_all_bootmem(). Memblock use free low memory for
* temporary data (see find_range_array()) and for this purpose can use
* pages that was already passed to the buddy allocator, hence marked as
* not accessible in the page tables when compiled with
* CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
* important here.
*/
set_highmem_pages_init();
/* this will put all low memory onto the freelists */
<span style="white-space:pre"> /*释放bootmem中的内存到伙伴系统中,包括bootmem占有的位图
返回总共释放的页面数**/ </span>
totalram_pages += free_all_bootmem();
reservedpages = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
/*
* Only count reserved RAM pages:
*/
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
reservedpages++;
<span style="white-space:pre"> /*内核代码段、数据段、初始化端长度*/ </span>
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10,
totalhigh_pages << (PAGE_SHIFT-10));
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
" .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
" .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
" .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,
#ifdef CONFIG_HIGHMEM
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
(LAST_PKMAP*PAGE_SIZE) >> 10,
#endif
VMALLOC_START, VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,
(unsigned long)__va(0), (unsigned long)high_memory,
((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
(unsigned long)&__init_begin, (unsigned long)&__init_end,
((unsigned long)&__init_end -
(unsigned long)&__init_begin) >> 10,
(unsigned long)&_etext, (unsigned long)&_edata,
((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
(unsigned long)&_text, (unsigned long)&_etext,
((unsigned long)&_etext - (unsigned long)&_text) >> 10);
/*
* Check boundaries twice: Some fundamental inconsistencies can
* be detected at build time already.
*/
#define __FIXADDR_TOP (-PAGE_SIZE)
#ifdef CONFIG_HIGHMEM
BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
#define high_memory (-128UL << 20)
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP
#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
BUG_ON(VMALLOC_END > PKMAP_BASE);
#endif
BUG_ON(VMALLOC_START >= VMALLOC_END);
BUG_ON((unsigned long)high_memory > VMALLOC_START);
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
}
其中会调用的几个函数:
/**
* free_all_bootmem - release free pages to the buddy allocator
*
* Returns the number of pages actually released.
*/
unsigned long __init free_all_bootmem(void)
{
unsigned long total_pages = 0;
bootmem_data_t *bdata;
list_for_each_entry(bdata, &bdata_list, list)
total_pages += free_all_bootmem_core(bdata);
return total_pages;
}
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { struct page *page; unsigned long start, end, pages, count = 0; if (!bdata->node_bootmem_map) return 0; <span style="white-space:pre"> /*节点内存开始和结束处*/ </span> start = bdata->node_min_pfn; end = bdata->node_low_pfn; bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data, start, end); while (start < end) {/*释放整个bootmem所涉及的内存*/ unsigned long *map, idx, vec; map = bdata->node_bootmem_map; idx = start - bdata->node_min_pfn;/*相对于开始处的偏移*/ vec = ~map[idx / BITS_PER_LONG];/*vec值为页面分配情况*/ <span style="white-space:pre"> /*如果开始地址以BITS_PER_LONG</span><span style="white-space:pre">位对其、连续的BITS_PER_LONG个页面都没有被分配(空闲),并且 释放起点以上的BITS_PER_LONG个页面都是合法的(不超过end值),则释放连续的BITS_PER_LONG个 页面,*/ </span> /* * If we have a properly aligned and fully unreserved * BITS_PER_LONG block of pages in front of us, free * it in one go. */ if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { int order = ilog2(BITS_PER_LONG); __free_pages_bootmem(pfn_to_page(start), order);/*释放到伙伴系统中*/ count += BITS_PER_LONG; start += BITS_PER_LONG; } else { unsigned long off = 0; vec >>= start & (BITS_PER_LONG - 1); while (vec) { if (vec & 1) {/*如果页面空闲 注意一开始vec的值是经过取反后得来的*/ page = pfn_to_page(start + off); __free_pages_bootmem(page, 0); count++;/*更新释放页面总数*/ } vec >>= 1;/*vec向右移动一位,表示访问下一个页面*/ off++; } start = ALIGN(start + 1, BITS_PER_LONG); } } <span style="white-space:pre"> /*虚拟地址转化为page 用于释放bdata中的位图所占有的内存*/ </span> page = virt_to_page(bdata->node_bootmem_map); pages = bdata->node_low_pfn - bdata->node_min_pfn; pages = bootmem_bootmap_pages(pages); /*计算bootmem分配器中所使用的页面数,即位图使用的页面数*/ count += pages; while (pages--)//每次释放一个页面 __free_pages_bootmem(page++, 0); bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); return count; }
void __meminit __free_pages_bootmem(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; unsigned int loop; prefetchw(page);????????? for (loop = 0; loop < nr_pages; loop++) { struct page *p = &page[loop]; if (loop + 1 < nr_pages) prefetchw(p + 1); __ClearPageReserved(p); set_page_count(p, 0);/*设置页面的引用位为0*/ } set_page_refcounted(page);/*设置页面的引用计数为1*/ __free_pages(page, order);/*释放页面*/ }
void __init set_highmem_pages_init(void) { struct zone *zone; int nid; for_each_zone(zone) { unsigned long zone_start_pfn, zone_end_pfn; if (!is_highmem(zone))/*验证是否属于高端内存区域中*/ continue; <span style="white-space:pre"> /*记录高端内存管理区的起始页框号和结束页框号*/ </span> zone_start_pfn = zone->zone_start_pfn; zone_end_pfn = zone_start_pfn + zone->spanned_pages; nid = zone_to_nid(zone); printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", zone->name, nid, zone_start_pfn, zone_end_pfn); <span style="white-space:pre"> /*将高端内存的页框添加到伙伴系统*/ </span> add_highpages_with_active_regions(nid, zone_start_pfn, zone_end_pfn); } totalram_pages += totalhigh_pages;
void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn) { phys_addr_t start, end; u64 i; for_each_free_mem_range(i, nid, &start, &end, NULL) { unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), start_pfn, end_pfn); unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), start_pfn, end_pfn); for ( ; pfn < e_pfn; pfn++) if (pfn_valid(pfn)) add_one_highpage_init(pfn_to_page(pfn)); } }
相关文章推荐
- POJ 2942 Knights of the Round Table(点双联通+二分图+染色)
- Qt应用程序以管理员身份启动
- 机器学习中模型评估与选择中的几个小问题
- 最大子段和的以为与二维求解
- Catalan数(卡特兰数)
- Web前端性能优化(五)网站样式和脚本
- oracle日志
- Mysql DATE_FORMAT函数用法
- python中的赋值
- mysql添加唯一约束语句
- 大数据流处理(SparkStreaming)核心源码解读以StreamingContext为主
- python网络应用入门:网络爬虫的使用
- mysql优化limit查询语句的5个方法
- Linux crontab定时执行任务
- namespace 命名空间
- 简单的ADSL登录界面
- BZOJ2440: [中山市选2011]完全平方数dizh
- RMQ算法
- Java开发中的23种设计模式详解
- MD之材料设计库(一)