您的位置:首页 > 其它

Kernel启动流程源码解析 6 setup_arch

2016-10-23 20:09 483 查看
一 setup_arch

setup_arch顾名思义就是架构相关的初始化函数,这里选取arm64结构进行分析。

1.0 setup_arch

定义在arch/arm64/kernel/setup.c中

void __init setup_arch(char **cmdline_p)
{
setup_processor(); // 检测处理器类型,并初始化处理器相关的底层变量

setup_machine_fdt(__fdt_pointer); // 获取device tree信息,并设置一些device tree相关全局变量信息
init_mm.start_code = (unsigned long) _text; // 记录kernel代码段的虚拟启始地址
init_mm.end_code = (unsigned long) _etext; // 记录kernel代码段的虚拟结束地址
init_mm.end_data = (unsigned long) _edata; // 记录kernel数据段的虚拟结束地址
init_mm.brk = (unsigned long) _end; // brk指向kernel全部段的虚拟结束地址

*cmdline_p = boot_command_line; // 将boot_command_line的地址保存到start_kernel局部指针变量cmdline_p中

parse_early_param(); // 解析需要'早期'处理的启动参数用

arm64_memblock_init(); // 初始化memblock,用于引导阶段的内存管理

paging_init(); // 初始化分页机制
request_standard_resources(); // 申请内核代码段和数据段内存资源

unflatten_device_tree(); // DTB转换成节点是device_node的树状结构

psci_init(); // Power State Coordination Interface,和多核启动相关,暂时没有用到

cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
cpu_read_bootcpu_ops(); // 获取cpu0的enable method
#ifdef CONFIG_SMP
smp_init_cpus(); // 获取除cpu0外的其他cpu信息,并将它们设为possible状态
smp_build_mpidr_hash(); // 生成MPIDR(cpu id) hash
#endif

#ifdef CONFIG_VT // 虚拟终端
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
}

1.1 setup_processor

定义在arch/arm64/kernel/setup.c中

static void __init setup_processor(void)
{
struct cpu_info *cpu_info;

/*
* locate processor in the list of supported processor
* types. The linker builds this table for us from the
* entries in arch/arm/mm/proc.S
*/
cpu_info = lookup_processor_type(read_cpuid_id());
if (!cpu_info) {
printk("CPU configuration botched (ID %08x), unable to continue.\n",
read_cpuid_id());
while (1);
}

cpu_name = cpu_info->cpu_name;

printk("CPU: %s [%08x] revision %d\n",
cpu_name, read_cpuid_id(), read_cpuid_id() & 15);

sprintf(init_utsname()->machine, "aarch64");
elf_hwcap = 0;
}

struct cpu_info cpu_table[] = {
{
.cpu_id_val = 0x000f0000,
.cpu_id_mask = 0x000f0000,
.cpu_name = "AArch64 Processor",
.cpu_setup = __cpu_setup,
},
{ /* Empty */ },
};

1.2 setup_machine_fdt

定义在arch/arm64/kernel/setup.c中

static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
struct boot_param_header *devtree;
unsigned long dt_root;

/* Check we have a non-NULL DT pointer */
if (!dt_phys) {
early_print("\n"
"Error: NULL or invalid device tree blob\n"
"The dtb must be 8-byte aligned and passed in the first 512MB of memory\n"
"\nPlease check your bootloader.\n");

while (true)
cpu_relax();

}

devtree = phys_to_virt(dt_phys);

/* Check device tree validity */
if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) {
early_print("\n"
"Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n"
"Expected 0x%x, found 0x%x\n"
"\nPlease check your bootloader.\n",
dt_phys, devtree, OF_DT_HEADER,
be32_to_cpu(devtree->magic));

while (true)
cpu_relax();
}

initial_boot_params = devtree; // 全局变量initial_boot_params保存device tree的内存虚拟地址
dt_root = of_get_flat_dt_root(); // 获取device tree的根结点

machine_name = of_get_flat_dt_prop(dt_root, "model", NULL);
if (!machine_name)
machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL);
if (!machine_name)
machine_name = "<unknown>";
// 以高通msm8996为例,其dts是这样定义的
// model = "Qualcomm MSM 8996";
// compatible = "qcom,msm8996";
pr_info("Machine: %s\n", machine_name);

/* Retrieve various information from the /chosen node */
of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); // 扫描 /chosen node,保存运行时参数bootargs到boot_command_line
/* Initialize {size,address}-cells info */
of_scan_flat_dt(early_init_dt_scan_root, NULL); // 扫描根节点,获取 {size,address}-cells信息,并保存在dt_root_size_cells和dt_root_addr_cells全局变量中
/* Setup memory, calling early_init_dt_add_memory_arch */
of_scan_flat_dt(early_init_dt_scan_memory, NULL); // 扫描DTB中的memory node,并把相关信息保存在全局变量meminfo中
}

1.3 parse_early_param

提取cmdline中提供需要'早期'处理的启动参数用(由early_param宏定义)
定义在arch/arm64/kernel/setup.c中

void __init parse_early_param(void)
{
static __initdata int done = 0;
static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];

if (done)
return;

/* All fall through to do_early_param. */
strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
parse_early_options(tmp_cmdline);
done = 1;
}

static int __init do_early_param(char *param, char *val, const char *unused)
{
const struct obs_kernel_param *p;

for (p = __setup_start; p < __setup_end; p++) {
if ((p->early && parameq(param, p->str)) ||
(strcmp(param, "console") == 0 &&
strcmp(p->str, "earlycon") == 0)
) {
if (p->setup_func(val) != 0)
pr_warn("Malformed early option '%s'\n", param);
}
}
/* We accept everything at this stage. */
return 0;
}

// early_param(str, fn)
// str是参数名,fn是处理函数

1.4 arm64_memblock_init

定义在arch/arm64/mm/init.c中

void __init arm64_memblock_init(void)
{
u64 *reserve_map, base, size;

/* Register the kernel text, kernel data and initrd with memblock */
memblock_reserve(__pa(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
if (phys_initrd_size) {
memblock_reserve(phys_initrd_start, phys_initrd_size);

/* Now convert initrd to virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
initrd_end = initrd_start + phys_initrd_size;
}
#endif

/*
* Reserve the page tables. These are already in use,
* and can only be in node 0.
*/
memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE);
memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE);

/* Reserve the dtb region */
memblock_reserve(virt_to_phys(initial_boot_params),
be32_to_cpu(initial_boot_params->totalsize));

/*
* Process the reserve map. This will probably overlap the initrd
* and dtb locations which are already reserved, but overlapping
* doesn't hurt anything
*/
reserve_map = ((void*)initial_boot_params) +
be32_to_cpu(initial_boot_params->off_mem_rsvmap);
while (1) {
base = be64_to_cpup(reserve_map++);
size = be64_to_cpup(reserve_map++);
if (!size)
break;
memblock_reserve(base, size);
}

dma_contiguous_reserve(0);

memblock_allow_resize();
memblock_dump_all();
}

1.5 paging_init

定义在arch/arm64/mm/mmu.c中

void __init paging_init(void)
{
void *zero_page;

/*
* Maximum PGDIR_SIZE addressable via the initial direct kernel
* mapping in swapper_pg_dir.
*/
memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE);

init_mem_pgprot();
map_mem();

/*
* Finally flush the caches and tlb to ensure that we're in a
* consistent state.
*/
flush_cache_all();
flush_tlb_all();

/* allocate the zero page. */
zero_page = early_alloc(PAGE_SIZE);

bootmem_init();

empty_zero_page = virt_to_page(zero_page);

/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
}

1.6 request_standard_resources

定义在arch/arm64/kernel/setup.c中

static void __init request_standard_resources(void)
{
struct memblock_region *region;
struct resource *res;

kernel_code.start = virt_to_phys(_text);
kernel_code.end = virt_to_phys(_etext - 1);
kernel_data.start = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);

for_each_memblock(memory, region) {
res = alloc_bootmem_low(sizeof(*res));
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;

request_resource(&iomem_resource, res);

if (kernel_code.start >= res->start &&
kernel_code.end <= res->end)
request_resource(res, &kernel_code);
if (kernel_data.start >= res->start &&
kernel_data.end <= res->end)
request_resource(res, &kernel_data);
}
}

1.7 unflatten_device_tree

定义在drivers/of/fdt.c中

void __init unflatten_device_tree(void)
{
__unflatten_device_tree(initial_boot_params, &of_allnodes,
early_init_dt_alloc_memory_arch);

/* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
of_alias_scan(early_init_dt_alloc_memory_arch);
}

static void __unflatten_device_tree(struct boot_param_header *blob,
struct device_node **mynodes,
void * (*dt_alloc)(u64 size, u64 align))
{
unsigned long start, mem, size;
struct device_node **allnextp = mynodes;

pr_debug(" -> unflatten_device_tree()\n");

if (!blob) {
pr_debug("No device tree pointer\n");
return;
}

pr_debug("Unflattening device tree:\n");
pr_debug("magic: %08x\n", be32_to_cpu(blob->magic));
pr_debug("size: %08x\n", be32_to_cpu(blob->totalsize));
pr_debug("version: %08x\n", be32_to_cpu(blob->version));

if (be32_to_cpu(blob->magic) != OF_DT_HEADER) {
pr_err("Invalid device tree blob header\n");
return;
}

/* First pass, scan for size */
start = ((unsigned long)blob) +
be32_to_cpu(blob->off_dt_struct);
size = unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
size = (size | 3) + 1;

pr_debug(" size is %lx, allocating...\n", size);

/* Allocate memory for the expanded device tree */
mem = (unsigned long)
dt_alloc(size + 4, __alignof__(struct device_node));

memset((void *)mem, 0, size);

((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);

pr_debug(" unflattening %lx...\n", mem);

/* Second pass, do actual unflattening */
start = ((unsigned long)blob) +
be32_to_cpu(blob->off_dt_struct);
unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
if (be32_to_cpup((__be32 *)start) != OF_DT_END)
pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
if (be32_to_cpu(((__be32 *)mem)[size / 4]) != 0xdeadbeef)
pr_warning("End of tree marker overwritten: %08x\n",
be32_to_cpu(((__be32 *)mem)[size / 4]));
*allnextp = NULL;

pr_debug(" <- unflatten_device_tree()\n");
}

// 第一次unflatten_dt_node获取size,第二次构建device node tree
//

1.8 psci_init

定义在arch/arm64/kernel/psci.c中

int __init psci_init(void)
{
struct device_node *np;
const char *method;
u32 id;
int err = 0;

np = of_find_matching_node(NULL, psci_of_match);
if (!np)
return -ENODEV;

pr_info("probing function IDs from device-tree\n");

if (of_property_read_string(np, "method", &method)) {
pr_warning("missing \"method\" property\n");
err = -ENXIO;
goto out_put_node;
}

if (!strcmp("hvc", method)) {
invoke_psci_fn = __invoke_psci_fn_hvc;
} else if (!strcmp("smc", method)) {
invoke_psci_fn = __invoke_psci_fn_smc;
} else {
pr_warning("invalid \"method\" property: %s\n", method);
err = -EINVAL;
goto out_put_node;
}

if (!of_property_read_u32(np, "cpu_suspend", &id)) {
psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
psci_ops.cpu_suspend = psci_cpu_suspend;
}

if (!of_property_read_u32(np, "cpu_off", &id)) {
psci_function_id[PSCI_FN_CPU_OFF] = id;
psci_ops.cpu_off = psci_cpu_off;
}

if (!of_property_read_u32(np, "cpu_on", &id)) {
psci_function_id[PSCI_FN_CPU_ON] = id;
psci_ops.cpu_on = psci_cpu_on;
}

if (!of_property_read_u32(np, "migrate", &id)) {
psci_function_id[PSCI_FN_MIGRATE] = id;
psci_ops.migrate = psci_migrate;
}

out_put_node:
of_node_put(np);
return err;
}

1.9 cpu_read_bootcpu_ops

定义在arch/arm64/kernel/cpu_ops.c中

void __init cpu_read_bootcpu_ops(void)
{
struct device_node *dn = NULL;
u64 mpidr = cpu_logical_map(0);

while ((dn = of_find_node_by_type(dn, "cpu"))) {
u64 hwid;
const __be32 *prop;

prop = of_get_property(dn, "reg", NULL);
if (!prop)
continue;

hwid = of_read_number(prop, of_n_addr_cells(dn));
if (hwid == mpidr) {
cpu_read_ops(dn, 0);
of_node_put(dn);
return;
}
}
}

1.10 smp_init_cpus

定义在arch/arm64/kernel/smp.c中

void __init smp_init_cpus(void)
{
struct device_node *dn = NULL;
unsigned int i, cpu = 1;
bool bootcpu_valid = false;

while ((dn = of_find_node_by_type(dn, "cpu"))) {
const u32 *cell;
u64 hwid;

/*
* A cpu node with missing "reg" property is
* considered invalid to build a cpu_logical_map
* entry.
*/
cell = of_get_property(dn, "reg", NULL);
if (!cell) {
pr_err("%s: missing reg property\n", dn->full_name);
goto next;
}
hwid = of_read_number(cell, of_n_addr_cells(dn));

/*
* Non affinity bits must be set to 0 in the DT
*/
if (hwid & ~MPIDR_HWID_BITMASK) {
pr_err("%s: invalid reg property\n", dn->full_name);
goto next;
}

/*
* Duplicate MPIDRs are a recipe for disaster. Scan
* all initialized entries and check for
* duplicates. If any is found just ignore the cpu.
* cpu_logical_map was initialized to INVALID_HWID to
* avoid matching valid MPIDR values.
*/
for (i = 1; (i < cpu) && (i < NR_CPUS); i++) {
if (cpu_logical_map(i) == hwid) {
pr_err("%s: duplicate cpu reg properties in the DT\n",
dn->full_name);
goto next;
}
}

/*
* The numbering scheme requires that the boot CPU
* must be assigned logical id 0. Record it so that
* the logical map built from DT is validated and can
* be used.
*/
if (hwid == cpu_logical_map(0)) {
if (bootcpu_valid) {
pr_err("%s: duplicate boot cpu reg property in DT\n",
dn->full_name);
goto next;
}

bootcpu_valid = true;

/*
* cpu_logical_map has already been
* initialized and the boot cpu doesn't need
* the enable-method so continue without
* incrementing cpu.
*/
continue;
}

if (cpu >= NR_CPUS)
goto next;

if (cpu_read_ops(dn, cpu) != 0)
goto next;

if (cpu_ops[cpu]->cpu_init(dn, cpu))
goto next;

pr_debug("cpu logical map 0x%llx\n", hwid);
cpu_logical_map(cpu) = hwid;
next:
cpu++;
}

/* sanity check */
if (cpu > NR_CPUS)
pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n",
cpu, NR_CPUS);

if (!bootcpu_valid) {
pr_err("DT missing boot CPU MPIDR, not enabling secondaries\n");
return;
}

/*
* All the cpus that made it to the cpu_logical_map have been
* validated so set them as possible cpus.
*/
for (i = 0; i < NR_CPUS; i++)
if (cpu_logical_map(i) != INVALID_HWID)
set_cpu_possible(i, true);
}

1.11 smp_build_mpidr_hash

定义在arch/arm64/kernel/setup.c中

static void __init smp_build_mpidr_hash(void)
{
u32 i, affinity, fs[4], bits[4], ls;
u64 mask = 0;
/*
* Pre-scan the list of MPIDRS and filter out bits that do
* not contribute to affinity levels, ie they never toggle.
*/
for_each_possible_cpu(i)
mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
pr_debug("mask of set bits %#llx\n", mask);
/*
* Find and stash the last and first bit set at all affinity levels to
* check how many bits are required to represent them.
*/
for (i = 0; i < 4; i++) {
affinity = MPIDR_AFFINITY_LEVEL(mask, i);
/*
* Find the MSB bit and LSB bits position
* to determine how many bits are required
* to express the affinity level.
*/
ls = fls(affinity);
fs[i] = affinity ? ffs(affinity) - 1 : 0;
bits[i] = ls - fs[i];
}
/*
* An index can be created from the MPIDR_EL1 by isolating the
* significant bits at each affinity level and by shifting
* them in order to compress the 32 bits values space to a
* compressed set of values. This is equivalent to hashing
* the MPIDR_EL1 through shifting and ORing. It is a collision free
* hash though not minimal since some levels might contain a number
* of CPUs that is not an exact power of 2 and their bit
* representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}.
*/
mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0];
mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0];
mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] -
(bits[1] + bits[0]);
mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) +
fs[3] - (bits[2] + bits[1] + bits[0]);
mpidr_hash.mask = mask;
mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0];
pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n",
mpidr_hash.shift_aff[0],
mpidr_hash.shift_aff[1],
mpidr_hash.shift_aff[2],
mpidr_hash.shift_aff[3],
mpidr_hash.mask,
mpidr_hash.bits);
/*
* 4x is an arbitrary value used to warn on a hash table much bigger
* than expected on most systems.
*/
if (mpidr_hash_size() > 4 * num_possible_cpus())
pr_warn("Large number of MPIDR hash buckets detected\n");
__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: