您的位置:首页 > 其它

kernel crash 发生后的那些事(四)

2013-07-08 18:51 183 查看
Oops信息打印后,后面还有一个 crash dump 和系统重启的过程,本例为系统重启,在UBoot中保存core dump.

die-> crash_kexec

kernel/kexec.c:

void crash_kexec(struct pt_regs *regs)

{

/* Take the kexec_mutex here to prevent sys_kexec_load

* running on one cpu from replacing the crash kernel

* we are using after a panic on a different cpu.

*

* If the crash kernel was not located in a fixed area

* of memory the xchg(&kexec_crash_image) would be

* sufficient. But since I reuse the memory...

*/

if (mutex_trylock(&kexec_mutex)) {

if (kexec_crash_image) {

struct pt_regs fixed_regs;

crash_setup_regs(&fixed_regs, regs);

crash_save_vmcoreinfo();

machine_crash_shutdown(&fixed_regs);

machine_kexec(kexec_crash_image);

}

else{

printk(KERN_ERR "Enter crash kexec !!\n");

struct pt_regs fixed_regs;

extern void machine_crash_swreset(void);

crash_setup_regs(&fixed_regs, regs);

crash_save_vmcoreinfo();

machine_crash_shutdown(&fixed_regs);

machine_crash_swreset();

}

mutex_unlock(&kexec_mutex);

}

}

die-> crash_kexec->machine_crash_shutdown

如果是使用Uboot做为crash kernel,代码运行 else分支。

arch/arm/kenerl/machine_kexec.c

void machine_crash_shutdown(struct pt_regs *regs)

{

unsigned long msecs;

local_irq_disable();

atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);

smp_call_function(machine_crash_nonpanic_core, NULL, false);

msecs = 1000; /* Wait at most a second for the other cpus to stop */

while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {

mdelay(1);

msecs--;

}

if (atomic_read(&waiting_for_crash_ipi) > 0)

printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");

crash_save_cpu(regs, smp_processor_id());

machine_kexec_mask_interrupts();

printk(KERN_INFO "Loading crashdump kernel...\n");

}

这里有个SMP相关的操作:smp_call_function

kernel/smp.c

/**

* smp_call_function(): Run a function on all other CPUs.

* @func: The function to run. This must be fast and non-blocking.

* @info: An arbitrary pointer to pass to the function.

* @wait: If true, wait (atomically) until function has completed

* on other CPUs.

*

* Returns 0.

*

* If @wait is true, then returns once @func has returned; otherwise

* it returns just before the target cpu calls @func.

*

* You must not call this function with disabled interrupts or from a

* hardware interrupt handler or from a bottom half handler.

*/

int smp_call_function(smp_call_func_t func, void *info, int wait)

{

preempt_disable();

smp_call_function_many(cpu_online_mask, func, info, wait);

preempt_enable();

return 0;

}

void machine_crash_nonpanic_core(void *unused)

{

struct pt_regs regs;

crash_setup_regs(®s, NULL);

printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",

smp_processor_id());

crash_save_cpu(®s, smp_processor_id());

flush_cache_all();

atomic_dec(&waiting_for_crash_ipi);

while (1)

cpu_relax();

}

其它的CPU保存寄存器和flush cache 后,进入死循环cpu_relax。

软件复位

machime_kexec.c文件中实现machine_crash_swreset,其中有个定义在 arch/arm/include/asm/system_misc.h

中的全局变量:extern void (*arm_pm_restart)(char str, const char *cmd);

void machine_crash_swreset(void)

{

printk(KERN_INFO "Software reset on panic!\n");

flush_cache_all();

outer_flush_all();

outer_disable();

arm_pm_restart(0, NULL);

}

在mach相关的代码中进行赋值

Core.c (arch\arm\mach-xxx): arm_pm_restart = xxx_restart;

static void xxx_restart(char mode, const char *cmd)

{

prcm_glb_soft_reset();

}

至此,kernel crash 发生后的所有事情分析完毕。后面会介绍怎样保存有效的kernel dump文件。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: