Linux内存分配alloc_page和__get_free_page详注(伙伴管理系统Buddy)
2014-02-23 00:10
519 查看
alloc_page和__get_free_page都是从Buddy分配页面,只是最终返回值类型不同而已,前者返回page指针,后者返回该page所在的虚拟地址。
两者最终都会调用到核心函数__alloc_pages_nodemask,下面详述该函数的处理流程。
两者最终都会调用到核心函数__alloc_pages_nodemask,下面详述该函数的处理流程。
struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; struct page *page; int migratetype = allocflags_to_migratetype(gfp_mask);//由于gfp flag和migrate type不是一一对应的关系,在此进行转换 gfp_mask &= gfp_allowed_mask; lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT);//如果此次内存分配可以等待(睡眠),那么再深入判断此task是否可以被调度,如果是将主动schedule if (should_fail_alloc_page(gfp_mask, order))//打开CONFIG_FAIL_PAGE_ALLOC调试配置选项时,为分配失败调试做准备 return NULL; /* * Check the zones suitable for the gfp_mask contain at least one * valid zone. It's possible to have an empty zonelist as a result * of GFP_THISNODE and a memoryless node */ if (unlikely(!zonelist->_zonerefs->zone)) return NULL; get_mems_allowed();//锁定分配策略,防止被修改 /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);//搜索可用的zone保存在preferred_zone if (!preferred_zone) {//如果没有可用的zone,释放分配策略,返回 put_mems_allowed(); return NULL; } /* First allocation attempt */ //快速路径分配,指定了cpu亲和性和选择高水线区,check zonelist,找到合适的zone,check水线值, //如果不满足水线值要求,启动回收机制,最后调用它再buffered_rmqueue在该zone分配内存 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype); if (unlikely(!page))//上面分配失败,开始从slowpath分配,会启动回收机制,并且降低水线值,再次调用快速路径分配 page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); put_mems_allowed();//释放策略 trace_mm_page_alloc(page, order, gfp_mask, migratetype);//调试使用 return page; }
/* * get_page_from_freelist goes through the zonelist trying to allocate * a page. */ static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, struct zone *preferred_zone, int migratetype) { struct zoneref *z; struct page *page = NULL; int classzone_idx; struct zone *zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ classzone_idx = zone_idx(preferred_zone); zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ for_each_zone_zonelist_nodemask(zone, z, zonelist,//遍历zonelist high_zoneidx, nodemask) { if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes))//check zonelist cache中是否有符合的zone continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask))//check cpuset和allowed node,函数头注释很清楚 goto try_next_zone;//不满足要求,check下一个zone BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {//需要check水线,否则直接跳到真正的分配动作 unsigned long mark; int ret; mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (zone_watermark_ok(zone, order, mark,//check该zone水线是否满足要求,水线值根据分配flag ALLOC_HIGH和ALLOC_HARDER, //有不同的计算方式,满足跳到真正分配动作,否则继续check classzone_idx, alloc_flags)) goto try_this_zone; if (zone_reclaim_mode == 0)//如果本zone不允许回收,更新zone list cache为full,为下一次check节省时间 goto this_zone_full; ret = zone_reclaim(zone, gfp_mask, order);//启动回收机制,如果不可以wait,返回ZONE_RECLAIM_NOSCAN, //否则在local zone或没有关联到其他processor的zone,调用__zone_reclaim进行回收 switch (ret) { case ZONE_RECLAIM_NOSCAN://没有scan,直接check next zone /* did not scan */ goto try_next_zone; case ZONE_RECLAIM_FULL://没有分配空间 /* scanned but unreclaimable */ goto this_zone_full; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark,//成功进行了回收,check水线是否满足要求 classzone_idx, alloc_flags)) goto this_zone_full; } } try_this_zone: page = buffered_rmqueue(preferred_zone, zone, order,//各种情况check完毕,在本zone进行真正的内存分配动作 gfp_mask, migratetype); if (page) break; this_zone_full: if (NUMA_BUILD) zlc_mark_zone_full(zonelist, z); try_next_zone: if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { /* * we do zlc_setup after the first zone is tried but only * if there are multiple nodes make it worthwhile */ allowednodes = zlc_setup(zonelist, alloc_flags);//更新zone list cache zlc_active = 1; did_zlc_setup = 1; } } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { /* Disable zlc cache for second zonelist scan */ zlc_active = 0; goto zonelist_scan; } return page; }
/* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); again: if (likely(order == 0)) {//如果分配单页,直接从pcp搜索 struct per_cpu_pages *pcp; struct list_head *list; local_irq_save(flags);//Disable and save irq state pcp = &this_cpu_ptr(zone->pageset)->pcp//取得pcp指针 list = &pcp->lists[migratetype];//取得对应的migrate type pcp list if (list_empty(list)) {//如果该list为空 pcp->count += rmqueue_bulk(zone, 0,//从buddy的free_area释放batch个页面到pcp pcp->batch, list, migratetype, cold); if (unlikely(list_empty(list))) goto failed; } if (cold)//分配冷页,不被cache的页,比如用于DMA page = list_entry(list->prev, struct page, lru);//分配冷页,即从链表尾开始查找 else//分配热页,被cache的页,提高效率 page = list_entry(list->next, struct page, lru);//热页从链表头开始查找 list_del(&page->lru);//从lru list删除该页 pcp->count--;//pcp->count值代表本pcp有多少页 } else { if (unlikely(gfp_flags & __GFP_NOFAIL)) { /* * __GFP_NOFAIL is not to be used in new code. * * All __GFP_NOFAIL callers should be fixed so that they * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to * allocate greater than order-1 page units with * __GFP_NOFAIL. */ WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags);//为操作buddy上锁 page = __rmqueue(zone, order, migratetype);//真正从buddy的free_area链表分配内存,分两种情况 //1. __rmqueue_smallest():如果order上对应分配策略要求的migrate type list有空间,从第一满足 //的节点上分配内存,并将剩余的部分add到更小的order链表上 //2. __rmqueue_fallback():如果对应的migrate type list上没有空间,fallback到其他的type list上, //释放一定空间到本migratte type list上,fallback有相应的sequence,再进行和__rmqueue_smallest类似的分配、合并动作 spin_unlock(&zone->lock); if (!page) goto failed; __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); } __count_zone_vm_events(PGALLOC, zone, 1 << order);//更新本cpu vm event信息 zone_statistics(preferred_zone, zone);//更新zone相关信息 local_irq_restore(flags);//enable and restore irq VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags))//如果本页已经本映射,重新分配 goto again; return page; failed: local_irq_restore(flags); return NULL; }
相关文章推荐
- 释放页到伙伴系统之free_one_page
- linux内核__get_free_page,kmalloc,vmalloc的区别,内核对内存的管理
- 关系管理系统:CustomerDaoimpl中添加用户分页显示getPageData()
- 堆管理算法中的Buddy System(伙伴系统)算法
- linux内存模型之buddy(伙伴)系统三从bootmem到buddy的过渡
- kmalloc kzalloc vmalloc malloc 和get_free_page()的区别
- [经典]Linux内核中get_free_page、kmalloc和vmalloc函数的区别(示例Module)
- Linux常用系统管理命令(top、free、kill、df)
- WSS项目管理系统Post get shell
- kmalloc vmalloc kzalloc malloc 和 get_free_page()【转】
- Linux-0.11内核源码分析系列:内存管理get_free_page()函数分析
- μCOS-II系统之时间管理函数OSTimeGet()
- μCOS-II系统之时间管理函数OSTimeGet()
- [转载]kmalloc vmalloc kcalloc kzalloc malloc 和 get_free_page()
- 伙伴系统分配器 - __alloc_pages
- linux内核内存管理学习之二(物理内存管理--伙伴系统)
- Linux内存管理之kmalloc 与 __get_free_page()
- get_free_page分配大块内存空间
- C语言构建WEB管理系统(三):CGI程序解析GET数据
- 伙伴宿舍管理系统(源码下载)