Linux内核源代码情景分析笔记---对照4.2.5内核 续
2015-11-08 14:25
519 查看
/* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with pte unmapped and unlocked. * * The mmap_sem may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ static int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags) { pte_t entry; spinlock_t *ptl; /* * some architectures can have larger ptes than wordsize, * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y, * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses. * The code below just needs a consistent view for the ifs and * we later double check anyway with the ptl lock held. So here * a barrier will do. */ entry = *pte; barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) return do_fault(mm, vma, address, pte, pmd, flags, entry); return do_anonymous_page(mm, vma, address, pte, pmd, flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); } if (pte_protnone(entry)) return do_numa_page(mm, vma, address, entry, pte, pmd); ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (unlikely(!pte_same(*pte, entry))) goto unlock; if (flags & FAULT_FLAG_WRITE) { if (!pte_write(entry)) return do_wp_page(mm, vma, address, pte, pmd, ptl, entry); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { update_mmu_cache(vma, address, pte); } else { /* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. * This still avoids useless tlb flushes for .text page faults * with threads. */ if (flags & FAULT_FLAG_WRITE) flush_tlb_fix_spurious_fault(vma, address); } unlock: pte_unmap_unlock(pte, ptl); return 0; }
关于2.4的do_no_page复杂了好多,判断项也多了不少。。/mm/memory.c
/* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, unsigned int flags) { struct mem_cgroup *memcg; struct page *page; spinlock_t *ptl; pte_t entry; pte_unmap(page_table); /* File mapping without ->vm_ops ? */ if (vma->vm_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; /* Use the zero-page for reads */ if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), vma->vm_page_prot)); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (!pte_none(*page_table)) goto unlock; goto setpte; } /* Allocate our own private page. */ if (unlikely(anon_vma_prepare(vma))) goto oom; page = alloc_zeroed_user_highpage_movable(vma, address); if (!page) goto oom; if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) goto oom_free_page; /* * The memory barrier inside __SetPageUptodate makes sure that * preceeding stores to the page contents become visible before * the set_pte_at() write. */ __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (!pte_none(*page_table)) goto release; inc_mm_counter_fast(mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); mem_cgroup_commit_charge(page, memcg, false); lru_cache_add_active_or_unevictable(page, vma); setpte: set_pte_at(mm, address, page_table, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, page_table); unlock: pte_unmap_unlock(page_table, ptl); return 0; release: mem_cgroup_cancel_charge(page, memcg); page_cache_release(page); goto unlock; oom_free_page: page_cache_release(page); oom: return VM_FAULT_OOM; }
do_anonymous_page()
/* * The in-memory structure used to track swap areas. */ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ struct plist_node avail_list; /* entry in swap_avail_head */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct swap_cluster_info free_cluster_head; /* free cluster list head */ struct swap_cluster_info free_cluster_tail; /* free cluster list tail */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ unsigned int inuse_pages; /* number of those currently in use */ unsigned int cluster_next; /* likely index for next allocation */ unsigned int cluster_nr; /* countdown to next cluster search */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct swap_extent *curr_swap_extent; struct swap_extent first_swap_extent; struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ #ifdef CONFIG_FRONTSWAP unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ atomic_t frontswap_pages; /* frontswap pages in-use counter */ #endif spinlock_t lock; /* * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, * cluster_nr, lowest_alloc, * highest_alloc, free/discard cluster * list. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If * both locks need hold, hold swap_lock * first. */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */ struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ };
swap_info_struct /linux/swap.h
int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) { int error; error = radix_tree_maybe_preload(gfp_mask); if (!error) { error = __add_to_swap_cache(page, entry); radix_tree_preload_end(); } return error; }
add_to_swap_cache /mm/swap_state.c
/* * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error; struct address_space *address_space; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_cache_get(page); SetPageSwapCache(page); set_page_private(page, entry.val); address_space = swap_address_space(entry); spin_lock_irq(&address_space->tree_lock); error = radix_tree_insert(&address_space->page_tree, entry.val, page); if (likely(!error)) { address_space->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(add_total); } spin_unlock_irq(&address_space->tree_lock); if (unlikely(error)) { /* * Only the context which have set SWAP_HAS_CACHE flag * would call add_to_swap_cache(). * So add_to_swap_cache() doesn't returns -EEXIST. */ VM_BUG_ON(error == -EEXIST); set_page_private(page, 0UL); ClearPageSwapCache(page); page_cache_release(page); } return error; }
相关文章推荐
- Linux socket 初步
- linux lsof详解
- linux 文件权限
- Linux 执行数学运算
- 10 篇对初学者和专家都有用的 Linux 命令教程
- Linux 与 Windows 对UNICODE 的处理方式
- Ubuntu12.04下QQ完美走起啊!走起啊!有木有啊!
- 解決Linux下Android开发真机调试设备不被识别问题
- 运维入门
- 运维提升
- Linux 自检和 SystemTap
- Ubuntu Linux使用体验
- c语言实现hashmap(转载)
- Linux 信号signal处理机制
- linux下mysql添加用户
- Scientific Linux 5.5 图形安装教程
- 基于 Linux 集群环境上 GPFS 的问题诊断
- 谁是桌面王者?Win PK Linux三大镇山之宝