您的位置:首页 > 其它

3.2 Qemu Memory管理

2015-07-26 16:08 281 查看
3.2.1 Qemu内存管理结构

(1) KVM内存管理初始化

main(vl.c)==>configure_accelerator==>kvm_init(kvm_all.c)==> memory_listener_register(&kvm_memory_listener,NULL);

Qemu中可以注册多个listener, 用memory_listeners链表来维护

voidmemory_listener_register(MemoryListener *listener, MemoryRegion *filter)

{

MemoryListener *other = NULL;

listener->address_space_filter = filter;

if (QTAILQ_EMPTY(&memory_listeners)

|| listener->priority >=QTAILQ_LAST(&memory_listeners,

memory_listeners)->priority) {

QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);

} else {

QTAILQ_FOREACH(other,&memory_listeners, link) {

if (listener->priority <other->priority) {

break;

}

}

QTAILQ_INSERT_BEFORE(other, listener,link);

}

listener_add_address_space(listener,&address_space_memory);

listener_add_address_space(listener,&address_space_io);

}

static MemoryListenerkvm_memory_listener = {

.begin = kvm_begin,

.commit = kvm_commit,

.region_add = kvm_region_add,

.region_del = kvm_region_del,

.region_nop = kvm_region_nop,

.log_start = kvm_log_start,

.log_stop = kvm_log_stop,

.log_sync = kvm_log_sync,

.log_global_start = kvm_log_global_start,

.log_global_stop = kvm_log_global_stop,

.eventfd_add = kvm_eventfd_add,

.eventfd_del = kvm_eventfd_del,

.priority = 10,

};

kvm_region_add==>kvm_set_phys_mem(section, true);

kvm_region_del==>kvm_set_phys_mem(section, false);

log_global_xxx用于动态迁移,本章暂不讨论。

kvm_eventfd_add,kvm_eventfd_del用于eventfd的管理

(2) System Memory初始化化

在Qemu初始化时会 main(vl.c)==>cpu_exec_init_all(exec.c)==>memory_map_init(exec.c)

static voidmemory_map_init(void)

{

system_memory =g_malloc(sizeof(*system_memory));

memory_region_init(system_memory,"system", INT64_MAX);

set_system_memory_map(system_memory);

system_io = g_malloc(sizeof(*system_io));

memory_region_init(system_io,"io", 65536);

set_system_io_map(system_io);

memory_listener_register(&core_memory_listener, system_memory);

memory_listener_register(&io_memory_listener, system_io);

}

qemu中系统内存system_memory来管理,io内存用system_io来管理,io内存管理将在第5章分析。static MemoryRegion *system_memory.MemoryRegion可以有子区域。 而memory_lister负责处理添加和移除内存区域的管理。

set_system_memory_map(system_memory);用system_memory来初始化address_space_memory.

void set_system_memory_map(MemoryRegion*mr)

{

memory_region_transaction_begin();

address_space_memory.root = mr;

memory_region_transaction_commit();

}

AddressSpace的定义如下:

struct AddressSpace {

MemoryRegion *root;

FlatView current_map;

int ioeventfd_nb;

MemoryRegionIoeventfd *ioeventfds;

};

(3) Memory Listener 管理

voidmemory_region_transaction_begin(void)

{

qemu_flush_coalesced_mmio_buffer();

++memory_region_transaction_depth;

}

qemu_flush_coalesced_mmio_buffer==>kvm_flush_coalesced_mmio_buffer(kvm_all.c)

voidmemory_region_transaction_commit(void)

{

--memory_region_transaction_depth;

if (!memory_region_transaction_depth) {

MEMORY_LISTENER_CALL_GLOBAL(begin,Forward);

if (address_space_memory.root) {

address_space_update_topology(&address_space_memory);

}

if (address_space_io.root) {

address_space_update_topology(&address_space_io);

}

MEMORY_LISTENER_CALL_GLOBAL(commit,Forward);

}

}

static void address_space_update_topology(AddressSpace*as)

{

FlatView old_view = as->current_map;

FlatView new_view =generate_memory_topology(as->root);

address_space_update_topology_pass(as,old_view, new_view, false);

address_space_update_topology_pass(as, old_view,new_view, true);

as->current_map = new_view;

flatview_destroy(&old_view);

address_space_update_ioeventfds(as);

}

address_space_update_topology_pass==》 MEMORY_LISTENER_UPDATE_REGION

#defineMEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback) \

MEMORY_LISTENER_CALL(callback, dir,(&(MemoryRegionSection) { \

.mr = (fr)->mr, \

.address_space = (as)->root, \

.offset_within_region =(fr)->offset_in_region, \

.size =int128_get64((fr)->addr.size), \

.offset_within_address_space =int128_get64((fr)->addr.start), \

.readonly = (fr)->readonly, \

}))

MEMORY_LISTENER_CALL会从前到后或从后到前遍历memory_listeners,并调用相应方法如region_add, region_del等。调用region_add的示例如下:

MEMORY_LISTENER_UPDATE_REGION(frnew,as, Forward, region_add);

3.2.1 PC内存管理流程分析

(1)RAM初始化

pc_init1(hw\pc_piix.c)==》pc_memory_init内存被分为两段0 ~ 0xE000_0000, 0xE000_0000以上pc_memory_init(hw\pc.c)

{ 。。。。。。

MemoryRegion * ram = g_malloc(sizeof(*ram));

//分配整个内存区域

memory_region_init_ram(ram,"pc.ram",

below_4g_mem_size +above_4g_mem_size);

vmstate_register_ram_global(ram);

*ram_memory = ram;

ram_below_4g =g_malloc(sizeof(*ram_below_4g));

memory_region_init_alias(ram_below_4g,"ram-below-4g", ram,

0, below_4g_mem_size);

memory_region_add_subregion(system_memory,0, ram_below_4g);

if (above_4g_mem_size > 0) {

ram_above_4g =g_malloc(sizeof(*ram_above_4g));

memory_region_init_alias(ram_above_4g,"ram-above-4g", ram,

below_4g_mem_size, above_4g_mem_size);

memory_region_add_subregion(system_memory, 0x100000000ULL,

ram_above_4g);

}

。。。。。。

}

voidmemory_region_init_ram(MemoryRegion *mr,

const char *name,

uint64_t size)

{

memory_region_init(mr, name, size);

mr->ram = true;

mr->terminates = true;

mr->destructor =memory_region_destructor_ram;

mr->ram_addr = qemu_ram_alloc(size, mr);

}

mr->ram_addr =qemu_ram_alloc(size, mr); 分配HVA

qemu_ram_alloc==》qemu_ram_alloc_from_ptr

a) 向ram_list 加入一个RAMBlock 结构;同时扩大ram_list.phys_dirty用于记录脏页

b) ==》kvm_vmalloc==》qemu_vmalloc

qemu_vmalloc调用操作系统虚拟内存分配接口函数。

void memory_region_init_alias(MemoryRegion*mr,

const char *name,

MemoryRegion*orig,

target_phys_addr_t offset,

uint64_t size)

{

memory_region_init(mr, name, size);

mr->alias = orig;

mr->alias_offset = offset;

}

memory_region_init_alias(ram_below_4g,"ram-below-4g", ram,0, below_4g_mem_size);

ram_below_4g->alias = ram;ram_below_4g->offset= 0;

memory_region_add_subregion(system_memory,0, ram_below_4g);

//将ram_below_4g加入到system_memory的subregion中去

memory_region_add_subregion==>memory_region_add_subregion_common

static voidmemory_region_add_subregion_common(MemoryRegion *mr,

target_phys_addr_t offset,

MemoryRegion *subregion)

{

MemoryRegion *other;

memory_region_transaction_begin();

assert(!subregion->parent);

subregion->parent = mr;

subregion->addr = offset;

if (subregion->may_overlap ||other->may_overlap) { //over la

continue;

}

。。。。。。。

}

QTAILQ_FOREACH(other,&mr->subregions, subregions_link) {

if (subregion->priority >=other->priority) {

QTAILQ_INSERT_BEFORE(other,subregion, subregions_link);

goto done;

}

}

QTAILQ_INSERT_TAIL(&mr->subregions,subregion, subregions_link);

done:

memory_region_transaction_commit();

}

由于此时core_memory_listener,kvm_memory_listener都以注册,memory_region_transaction_commit();将触发他们的add_region被调用。

(2) rom区域

pc_init1:

pci_memory = g_new(MemoryRegion, 1);

memory_region_init(pci_memory,"pci", INT64_MAX);

rom_memory = pci_memory;

pc_memory_init==> pc_system_firmware_init==》old_pc_system_rom_init

bios rom区域的建立:

memory_region_init_ram(bios,"pc.bios", bios_size);

vmstate_register_ram_global(bios);

memory_region_set_readonly(bios, true);

isa_bios = g_malloc(sizeof(*isa_bios));

memory_region_init_alias(isa_bios,"isa-bios", bios,

bios_size -isa_bios_size, isa_bios_size);

memory_region_add_subregion_overlap(rom_memory,

0x100000 - isa_bios_size,

isa_bios,

1);

memory_region_set_readonly(isa_bios, true);

/* map all the bios at the top of memory */

memory_region_add_subregion(rom_memory,

(uint32_t)(-bios_size),

bios);

(3) Ram RW VM-Exit处理

kvm_cpu_exec==》case KVM_EXIT_MMIO ==>cpu_physical_memory_rw 下面的示例为ram区域的写:

ram_addr_t addr1;

addr1 =memory_region_get_ram_addr(section->mr)

+memory_region_section_addr(section, addr);

/* RAM case */

ptr = qemu_get_ram_ptr(addr1);

memcpy(ptr, buf, l);

invalidate_and_set_dirty(addr1,l);

qemu_put_ram_ptr(ptr);

static voidinvalidate_and_set_dirty(target_phys_addr_t addr,

target_phys_addr_t length)

{

if (!cpu_physical_memory_is_dirty(addr)) {

/* invalidate code */

tb_invalidate_phys_page_range(addr,addr + length, 0);

/* set dirty bit */

cpu_physical_memory_set_dirty_flags(addr, (0xff &~CODE_DIRTY_FLAG));

}

}

将页标记为脏

static inline intcpu_physical_memory_set_dirty_flags(ram_addr_t addr,

int dirty_flags)

{

if ((dirty_flags &MIGRATION_DIRTY_FLAG) &&

!cpu_physical_memory_get_dirty(addr,TARGET_PAGE_SIZE,

MIGRATION_DIRTY_FLAG)) {

ram_list.dirty_pages++;

}

return ram_list.phys_dirty[addr >>TARGET_PAGE_BITS] |= dirty_flags;

}

(4) 其他

memory_region_transaction_commit==》address_space_update_topology==》address_space_update_topology_pass ==》

if (adding) {

MEMORY_LISTENER_UPDATE_REGION(frnew, as,Forward, region_nop);

if (frold->dirty_log_mask&& !frnew->dirty_log_mask) {

MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop);

} else if (frnew->dirty_log_mask&& !frold->dirty_log_mask) {

MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start);

}

}

当添加/移除或更新内存是会调用memory_region_transaction_commit,此时如果更新前后区域相同,则对原区域调用log_stop,新区域调用log_start. log目前用于vga 虚拟驱动.

3.2.3 Qemu到KVM内存管理接口分析

kvm_set_phys_mem用于设置内存, 该函数流程如下:

(1) start_addr = section->offset_within_address_space

ram_addr_t size = section->size;

根据物理起始地址和长度,在kvm_state中搜索已建立的KVMSlot *mem区域

typedef struct KVMSlot

{

target_phys_addr_t start_addr;

ram_addr_t memory_size;

void *ram;

int slot;

int flags;

} KVMSlot;

(2) 如果没找到,则推出循环并建立一个slot; add 为false时直接退出

mem = kvm_alloc_slot(s);

mem->memory_size = size;

mem->start_addr = start_addr;

mem->ram = ram;

mem->flags = kvm_mem_flags(s,log_dirty);

然后调用 err = kvm_set_user_memory_region(s, mem); 通知内核态建立内存区域

static intkvm_set_user_memory_region(KVMState *s, KVMSlot *slot)

{

struct kvm_userspace_memory_region mem;

mem.slot = slot->slot;

mem.guest_phys_addr = slot->start_addr;

mem.memory_size = slot->memory_size;

mem.userspace_addr = (unsignedlong)slot->ram;

mem.flags = slot->flags;

if (s->migration_log) {

mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;

}

return kvm_vm_ioctl(s,KVM_SET_USER_MEMORY_REGION, &mem);

}

(3) 如果找到,且区域完全重合则调用 并且add==true

kvm_slot_dirty_pages_log_change(mem,log_dirty);并返回 其中

log_dirty =memory_region_is_logging(mr); //return mr->dirty_log_mask;

if (mem->flags &KVM_MEM_LOG_DIRTY_PAGES) {

kvm_physical_sync_dirty_bitmap(section);

}

当kvm_log_global_start时KVM_MEM_LOG_DIRTY_PAGES flag会被设置

(4) 如果找到,但不完全重合

a. 取消slot区域

old = *mem;

/* unregister the overlapping slot */

mem->memory_size = 0;

err = kvm_set_user_memory_region(s,mem);

b.将新建两个区域

slot->StartAddr to mr->startaddr

mr->start_addr to (slot->startadd + slot->memory_size)

3.2.4 KVM内存虚拟化框架

(1) memslots

kvm_vm_ioctl ==> kvm_vm_ioctl_set_memory_region ==> __kvm_set_memory_region

内核态也维护了一个slots, struct kvm->memslots,其定义如下:

struct kvm_memslots {

u64 generation;

struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];

/* The mapping table from slot id to the index in memslots[]. */

short id_to_index[KVM_MEM_SLOTS_NUM];

};

struct kvm_memory_slot {

gfn_t base_gfn; //guestphysical page numer

unsigned long npages; // page numbers

unsigned long *dirty_bitmap;

struct kvm_arch_memory_slot arch;

unsigned long userspace_addr; //guest virtual start address

u32 flags;

short id;

};

内核态slot的管理策略是根据用户空间的slot_id一一对应的

slot =id_to_memslot(kvm->memslots, mem->slot); //根据用户态slot号得到内核slot结构

__kvm_set_memory_region流程如下:

a. 根据用户态slot号得到内核slot结构

b.根据slot中的值和要设置的值,决定要操作的类别:

enum kvm_mr_change {

KVM_MR_CREATE,

KVM_MR_DELETE,

KVM_MR_MOVE,

KVM_MR_FLAGS_ONLY,

};

c. 根据b中的动作进行操作

i . KVM_MR_CREATE: kvm_arch_create_memslot

X86 arch layer memslot 该结构按大页分级页表来gpa

struct kvm_arch_memory_slot {

unsigned long *rmap[KVM_NR_PAGE_SIZES];

struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; //记录页被写的次数

};

ii KVM_MR_DELETE OR KVM_MR_MOVE:

1. 将原slot标记为无效

slot->flags|= KVM_MEMSLOT_INVALID;

old_memslots = install_new_memslots(kvm, slots, NULL);

kvm_iommu_unmap_pages(kvm, &old);

kvm_arch_flush_shadow_memslot 刷新影子页表3.4节分析

2. 安装新slot,对于delete而言会将新slot清零memset(&new.arch, 0, sizeof(new.arch));

iii r = kvm_arch_prepare_memory_region(kvm,&new, mem, change);

通过vm_mmap调用为hva分配空间

iv 删除要取消映射的区域

install_new_memslots(kvm, slots,&new);

kvm_arch_commit_memory_region(kvm,mem, &old, change); //vm_unmap

kvm_free_physmem_slot(kvm,&old, &new);

kfree(old_memslots);

v KVM_MR_CREATEOR KVM_MR_MOVE:

kvm_iommu_map_pages(kvm, &new);// 在第7章分析
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: