您的位置:首页 > 其它

当一个线程fork出另一个线程,会发生什么?

2011-01-01 20:54 204 查看
Process "Parant"

--> 软中断int $0x80 [entry.S] <---> _set_gate(idt_table+0x80,15,3,system_call,__KERNEL_CS); [arch/i386/kernel/traps.c]

system_call:

pushl %eax /* __NR_fork */

SAVE_ALL

syscall_call:

call *sys_call_table(,%eax,4)

/* 于是,系统流程转向函数sys_fork()。(“arch/i386/kernel/process.c”)*/

asmlinkage int sys_fork(struct pt_regs regs) /* regs来自陷入内核时的SAVE_ALL

* 和硬件压入的%eip, %cs, %oldesp, %oldss */

{

return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL);

/* SIGCHLD告诉do_fork()函数应创建一子进程 */

}

--> do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL)

--> alloc_pid()

...

--> copy_process(SIGCHLD, regs.esp, ®s, 0, NULL, NULL)

...

--> dup_task_struct(current)

这个函数创建了一个空进程描述符和一个空内核栈,并把它们关联起来,最后返回这个描述符给'p'

struct task_struct *tsk; /* 指向新创建的进程描述符 */

struct thread_info *ti; /* 指向新的内核栈 */

tsk = alloc_task_struct(); /* 创建一个空的进程描述符 */

ti = alloc_thread_info(tsk); /* 创建一个空的内核栈 */

*tsk = *current; /* 赋值父进程的描述符 */

tsk->thread_info = ti; /* 关联起来 */

setup_thread_stack(tsk, orig); /* 初始化子进程内核栈中的thread_info为父进程(current)的thread_info

* 的内容 ,然后将子进程的thread_info 和子进程描述符关联起来 */

...

--> copy_thread(0, clone_flags, regs.esp, /* unused */, p /* 被创建进程 */, regs)

--> childregs = task_pt_regs(p);

得到子进程内核栈中寄存器的起始地址,见下图

|_____thread_info____| LOW

|__________________|

|__________________|

|__________________|

|__________________|

childregs-->|__________________|

|__________________|-->空出32个字节(Ring0)

HIGH

*childregs = *regs; /* 即子进程内核栈中存放陷入内核之前所有寄存器的值 */

childregs->eax = 0; /* 子进程从fork返回0 */

childregs->esp = regs.esp; /* WHY HERE? */

p->thread.esp = (unsigned long) childregs;

p->thread.esp0 = (unsigned long) (childregs+1); /* 记录寄存器信息,内核栈顶esp和esp0 */

p->thread.eip = (unsigned long) ret_from_fork;

...

perfctr_copy_task(p, regs);

...

--> if (!(clone_flags & CLONE_STOPPED))

wake_up_new_task(p, clone_flags);

else

p->state = TASK_STOPPED;

--> void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)

--> rq = task_rq_lock(p, &flags);

--> rq = task_rq(p); /* 得到p所在CPU的运行队列rq */

--> this_cpu = smp_processor_id();

cpu = task_cpu(p);

--> if (likely(cpu == this_cpu)) {

if (!(clone_flags & CLONE_VM)) {

/* If the child will run on the same CPU as the parent, and parent and child do not * share the same set of page tables

* (CLONE_VM flag cleared), it then forces the

* child to run before the parent by inserting it into the parent's runqueue right * before the parent. */

if (unlikely(!current->array))

__activate_task(p, rq);

else {

p->prio = current->prio;

p->normal_prio = current->normal_prio;

/*

* Each task_struct descriptor includes a run_list field of type

* list_head. If the process priority is equal to k (a value ranging

* between 0 and 139), the run_list field links the process

* descriptor into the list of runnable processes having priority k.

*/

list_add_tail(&p->run_list, ¤t->run_list); /*Run child first*/

p->array = current->array;

p->array->nr_active++;

inc_nr_running(p, rq);

}

set_need_resched(); /* set the TIF_NEED_RESCHED flag of the 'current'

* As see above, the child thread is going to run

* immediatly */

} else

/* Run child last */

__activate_task(p, rq);

...

--> 此时,当前CPU还在执行父进程,即current,current从fork()系统调用返回到entry.S中继续执行,返回值nr(即子进程

号)存放在%eax中

[entry.S]

movl %eax,EAX(%esp) # store the return value

syscall_exit:

cli # make sure we don't miss an interrupt

# setting need_resched or sigpending

# between sampling and the iret

TRACE_IRQS_OFF

movl TI_flags(%ebp), %ecx

testw $_TIF_ALLWORK_MASK, %cx # current->work

jne syscall_exit_work

/* 正如内核中的注释,如果当前进程还有什么额外的工作需要完成,则跳转到syscall_exit_work */

syscall_exit_work:

testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl

jz work_pending # 如果thread_info的这些位被设置,则跳到work_pending

# 由于在wake_up_new_task()函数中,通过set_need_resched()

# 设置了_TIF_NEED_RESCHED,因此在work_pending中会调用调度函数schedule

# 见下面分析

TRACE_IRQS_ON

sti # could let do_syscall_trace() call

# schedule() instead

movl %esp, %eax

movl $1, %edx

call do_syscall_trace

/* 上面的工作处理完后,跳转到resume_userspace. 由于系统调用属于软中断,也就是用户请求

* 请求内核为自己做一些工作,因此最后还要回到用户层 */

jmp resume_userspace

work_pending:

testb $(1<<TIF_NEED_RESCHED), %cl

jz work_notifysig

work_resched:

call schedule

cli

jmp resume_userspace

最后:

ENTRY(resume_userspace)

cli # make sure we don't miss an interrupt

# setting need_resched or sigpending

# between sampling and the iret

movl TI_flags(%ebp), %ecx

andl $_TIF_WORK_MASK, %ecx # is there any work to be done on

# int/exception return?

jne work_pending

jmp restore_all

--> 由于父进程调用了schedule(),因此将切换到新进程next被执行,这个线程很可能是由父进程创建的那个子进程

asmlinkage void __sched schedule(void)

...

--> idx = sched_find_first_bit(array->bitmap); /* 这就是这个算法为何是O(1)的 */

queue = array->queue + idx; /* 最高优先级的进程 */

next = list_entry(queue->next, struct task_struct, run_list);

...

--> 如果是子进程,那么子进程将会执行ret_from_fork

这是因为在copy_thread()中, p->thread.eip = (unsigned long) ret_from_fork;

--> ENTRY(ret_from_fork)

pushl %eax # (%esp) = 0, %esp = %esp - 4

call schedule_tail # invokes the finish_task_switch() function

# to complete the process switch

popl %eax # %eax = 0

pushl $0x0202 # Reset kernel eflags

popfl

jmp syscall_exit # 见上面的分析

...

restore_all:

...

iret <--『这就是硬件性能计数器开始计数的点!!!!』

最后的最后,子进程执行iret,真正return from fork ...

如果从内核态到用户态

iret--->一次从内核栈中弹出%eip,%cs,%eflags /------------------------> 则还弹出%oldesp, %oldss

/------------------------> 则iret工作到此结束

如果从内核态到内核态

而由于前面分析过,*childregs = *regs; /* 即子进程内核栈中存放陷入内核之前所有寄存器的值 */

因此父进程和子进程从同一段代码开始执行,只是返回值不同。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐