您的位置:首页 > 运维架构 > Linux

linux 调度器分析

2017-01-17 19:03 211 查看
cpu也是一种资源,linux 调度器负责不同进程在cpu上面的运行。

进程的调度时机发生在下面几种情况:

1、进程状态转换的时刻:进程终止、进程睡眠;

2、当前进程的时间片用完时(current->counter=0);

3、设备驱动程序主动调用schedule,让出cpu

4、进程从中断、异常及系统调用返回到用户态时;

调度器实现主要在kernel/sched/core.c中,

这个文件主要实现调度器框架,具体实现代码在每个调度子类中。

我们看下调度器代码实现:

asmlinkage void __sched schedule(void)

{
struct task_struct *tsk = current;

sched_submit_work(tsk);
__schedule();

}

主要实现在__schedule中,

这个函数先是将当前进程加入运行队列,然后选择下个可调度进程,
之后调用进程上下文切换,开始执行下个进程。

/*
* __schedule() is the main scheduler function.
*
* The main means of driving the scheduler and thus entering this function are:
*
* 1. Explicit blocking: mutex, semaphore, waitqueue, etc.
*
* 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
* paths. For example, see arch/x86/entry_64.S.
*
* To drive preemption between tasks, the scheduler sets the flag in timer
* interrupt handler scheduler_tick().
*
* 3. Wakeups don't really cause entry into schedule(). They add a
* task to the run-queue and that's it.
*
* Now, if the new task added to the run-queue preempts the current
* task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
* called on the nearest possible occasion:
*
* - If the kernel is preemptible (CONFIG_PREEMPT=y):
*
* - in syscall or exception context, at the next outmost
* preempt_enable(). (this might be as soon as the wake_up()'s
* spin_unlock()!)
*
* - in IRQ context, return from interrupt-handler to
* preemptible context
*
* - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
* then at the next:
*
* - cond_resched() call
* - explicit schedule() call
* - return from syscall or exception to user-space
* - return from interrupt-handler to user-space
*/
/*
这个函数会在进程阻塞,比如通过信号量,队列等,
或者在中断、系统调用返回时调用
另外,唤醒进程只是将进程加入运行队列,下个调度时机来临时
*/

static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
u64 wallclock;

need_resched:
//调度时禁止抢占,避免递归调度
preempt_disable();
//当前cpu
cpu = smp_processor_id();
//当前cpu rq
rq = cpu_rq(cpu);
rcu_note_context_switch(cpu);
prev = rq->curr;//当前队列的当前进程

schedule_debug(prev);

if (sched_feat(HRTICK))
hrtick_clear(rq);

/*
* Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up().
*/
smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);

#ifdef CONFIG_ARCH_WANTS_CTXSW_LOGGING
dlog("%s: locked %p at %llu\n", __func__, &rq->lock, sched_clock());
#endif
//当前进程非自愿切换次数
switch_count = &prev->nivcsw;
//当前进程非running状态并且不是通过内核抢占进入的schedule
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely(signal_pending_state(prev->state, prev))) {
//有信号需要处理
prev->state = TASK_RUNNING;
} else {
//没有信号挂起需要处理,会将此进程移除运行队列
//如果代码执行到此,说明当前进程要么准备退出,要么是处于即将睡眠状态
deactivate_task(rq, prev, DEQUEUE_SLEEP);
prev->on_rq = 0;

/*
* If a worker went to sleep, notify and ask workqueue
* whether it wants to wake up a task to maintain
* concurrency.
*/
if (prev->flags & PF_WQ_WORKER) {
struct task_struct *to_wakeup;

to_wakeup = wq_worker_sleeping(prev, cpu);
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
}
switch_count = &prev->nvcsw;
}
//这个函数只有RT 调度器有实现,用来pull 一个task到rq
pre_schedule(rq, prev);

if (unlikely(!rq->nr_running))//当前rq无进程运行时,从其他cpu上取一个task执行
idle_balance(cpu, rq);
//prev task重新加入rb-tree
put_prev_task(rq, prev);
//选择下个可调度进程
next = pick_next_task(rq);
//更新墙上时间
wallclock = sched_ktime_clock();
if (!prev->on_rq)
task_note_last_sleep(prev, wallclock);
update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
//清空need_resched标志
clear_tsk_need_resched(prev);
rq->skip_clock_update = 0;

BUG_ON(task_cpu(next) != cpu_of(rq));

if (likely(prev != next)) {
rq->nr_switches++;
rq->curr = next;
++*switch_count;

#ifdef CONFIG_ARCH_WANTS_CTXSW_LOGGING
dlog("%s: enter context_switch at %llu\n",
__func__, sched_clock());
#endif
//上下文切换
context_switch(rq, prev, next); /* unlocks the rq */
/*
* The context switch have flipped the stack from under us
* and restored the local variables which were saved when
* this task called schedule() in the past. prev == current
* is still correct, but it can be moved to another cpu/rq.
*/
//新的进程有可能在其他CPU上运行,重新获取一次CPU和rq
cpu = smp_processor_id();
rq = cpu_rq(cpu);
} else
raw_spin_unlock_irq(&rq->lock);

post_schedule(rq);
//重新打开抢占使能但不立即执行重新调度
sched_preempt_enable_no_resched();
//新的进程如果有设置TIF_NEED_RESCHED,需要重新调度
if (need_resched())
goto need_resched;
}

接下来我们看下TIF_NEED_RESCHED的用法,在kernel/arch/arm64/kernel/entry.S中

返回用户空间时调用work_pending,work_pending中测试是否有设置了TIF_NEED_RESCHED,

如果有设置的话,就会调用schedule,重新选择进程运行。
ret_to_user:
disable_irq // disable interrupts
ldr x1, [tsk, #TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
enable_step_tsk x1, x2

work_pending:
tbnz x1, #TIF_NEED_RESCHED, work_resched
/* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */
ldr x2, [sp, #S_PSTATE]
mov x0, sp // 'regs'
tst x2, #PSR_MODE_MASK // user mode regs?
b.ne no_work_pending // returning to kernel
enable_irq // enable interrupts for do_notify_resume()
bl do_notify_resume
b ret_to_user
work_resched:
bl schedule

后面会单独介绍cfs,rt 调度算法以及负载均衡实现。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: