您的位置:首页 > 运维架构 > Linux

Linux调度时机

2016-09-16 21:21 204 查看
上篇介绍了调度http://blog.csdn.net/kklvsports/article/details/52494246,其中也提到了调度时机有,中断,异常;进程主动退出,睡眠;进程创建,被唤醒,优先级改变时;进程阻塞,如阻塞在信号量,互斥锁。本章针对各个场景分析

1. 中断异常

s3c2410_timer_init-->setup_irq(IRQ_TIMER4, &s3c2410_timer_irq)设置s3c2410_timer_interrupt为2410时钟中断处理程序。s3c2410_timer_interrupt-->update_process_times-->scheduler_tick

/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
*
* It also gets called by the fork code, when changing the parent's
* timeslices.
*/
void scheduler_tick(void)
{
int cpu = smp_processor_id();//获取当前CPU号,逻辑CPU号
struct rq *rq = cpu_rq(cpu); //获取当前cpu的就绪队列rq, 每个CPU都有一个就绪队列rq
struct task_struct *curr = rq->curr;//获取就绪队列上正在运行的进程curr

sched_clock_tick();

spin_lock(&rq->lock);
update_rq_clock(rq); //更新rq的当前时间戳.即rq->clock变为当前时间戳
update_cpu_load(rq); //更新rq的负载信息,即就绪队列的cpu_load[]数据
curr->sched_class->task_tick(rq, curr, 0);// 执行当前运行进程所在调度类的task_tick函数进行周期性调度
spin_unlock(&rq->lock);

perf_event_task_tick(curr, cpu);

#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);  //如果到是时候进行周期性负载平衡则触发SCHED_SOFTIRQ
#endif
}
由上可见scheduler_tick函数调度进程调度类的task_tick方法,CFS调度类对应的方法是task_tick_fair

/*
* scheduler tick hitting a task of our scheduling class:
*/
static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se;

for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
entity_tick(cfs_rq, se, queued);//调度实体tick处理
}
}

static void
entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
{
/*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);//更新当前任务的运行时间等

#ifdef CONFIG_SCHED_HRTICK
/*
* queued ticks are scheduled to match the slice, so don't bother
* validating it and just reschedule.
*/
if (queued) {
resched_task(rq_of(cfs_rq)->curr);
return;
}
/*
* don't let the period tick interfere with the hrtick preemption
*/
if (!sched_feat(DOUBLE_TICK) &&
hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
return;
#endif

if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
check_preempt_tick(cfs_rq, curr);//检查当前任务运行时间是否大于理想时间,如果大于抢占当前任务
}

关键函数update_curr和check_preempt_tick,分别分析之

update_curr-->__update_curr-->calc_delta_fair

static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
u64 now = rq_of(cfs_rq)->clock;
unsigned long delta_exec;

if (unlikely(!curr))
return;

/*
* Get the amount of time the current task was running
* since the last time we changed load (this cannot
* overflow on 32 bits):
*/
delta_exec = (unsigned long)(now - curr->exec_start);//当前任务运行时长;当前任务被抢占场景,当前任务调度时会调用update_curr,参见schedue函数
if (!delta_exec)
return;

__update_curr(cfs_rq, curr, delta_exec);//更新当前任务虚拟运行时间vruntime(加权计算所得),实际运行时间等
curr->exec_start = now;

if (entity_is_task(curr)) {
struct task_struct *curtask = task_of(curr);

trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cpuacct_charge(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
}
}
/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
*/
static inline void
__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
unsigned long delta_exec)
{
unsigned long delta_exec_weighted;

schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));

curr->sum_exec_runtime += delta_exec;//当前任务总运行时长,各次运行时间的总和
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);//计算当前任务加权后的运行时间
curr->vruntime += delta_exec_weighted;//更新当前任务vruntime
update_min_vruntime(cfs_rq);//更新cfs_rq的min_vruntime
}
/*
* delta /= w
*/
static inline unsigned long
calc_delta_fair(unsigned long delta, struct sched_entity *se)
{
if (unlikely(se->load.weight != NICE_0_LOAD))
delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);

return delta;
}
static void update_min_vruntime(struct cfs_rq *cfs_rq)
{
u64 vruntime = cfs_rq->min_vruntime;

if (cfs_rq->curr)
vruntime = cfs_rq->curr->vruntime;

if (cfs_rq->rb_leftmost) {
struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
struct sched_entity,
run_node);

if (!cfs_rq->curr)
vruntime = se->vruntime;
else
vruntime = min_vruntime(vruntime, se->vruntime);
}

cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
}

可见update_curr主要功能是更新当前任务的vruntime,sum_exec_runtime等;
下面分析check_preempt_tick。

/*
* Preempt the current task with a newly woken task if needed:
*/
static void
check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
unsigned long ideal_runtime, delta_exec;

ideal_runtime = sched_slice(cfs_rq, curr);//根据任务权重,调度周期等得到理想运行时间
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;//当前任务本次运行的时长,不是虚拟时间是实际运行时长
if (delta_exec > ideal_runtime) { //当前任务运行时长大于理想运行时间
resched_task(rq_of(cfs_rq)->curr); //设置当前任务的TIF_NEED_RESCHED标志,待当前任务返回用户态前检查标志并调度
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
*/
clear_buddies(cfs_rq, curr);
return;
}

/*
* Ensure that a task that missed wakeup preemption by a
* narrow margin doesn't have to wait for a full slice.
* This also mitigates buddy induced latencies under load.
*/
if (!sched_feat(WAKEUP_PREEMPT))
return;

if (delta_exec < sysctl_sched_min_granularity)//如果运行时间不满最小运行时间不
return;
/*从vruntime维度即虚拟运行时间看如果当前任务运行时间和下一个调度实体运行时间差值大于理想时长,则设置调度标志TIF_NEED_RESCHED
* 注意__pick_next_entity是从红黑树上取左边第二节点,即下一个要调度的实体
*/
if (cfs_rq->nr_running > 1) {
struct sched_entity *se = __pick_next_entity(cfs_rq);
s64 delta = curr->vruntime - se->vruntime;

if (delta > ideal_runtime)
resched_task(rq_of(cfs_rq)->curr);
}
}

可见check_preempt_tick检查当前任务运行时间是否大于理想运行时间,如果大于设置标志TIF_NEED_RESCHED。
总和上述分析sheduler_tick按照系统时钟频率的频度更新当前任务的运行时间,并检查运行时间是否大于理想时长,如果大于就设置TIF_NEED_RESCHED,但是何时调度呢?当进程从中断,异常,系统调用返回用户态前都会调用ret_from_sys_call(体系结构相关汇编函数),该函数会检查TIF_NEED_RESCHED标志,如果置位则调用schedule调度程序。另外打开抢占开关preempt_enable-->preempt_check_resched的时候也会检查该标志,进而spin_unlock-->preempt_enable也会检查标志。

2. 进程主动退出,睡眠

这个应该很好理解,不再赘述。

3. 进程创建,被唤醒,优先级改变

例如创建进程时如下流程设置TIF_NEED_RESCHED标志,do_fork-->wake_up_new_task-->check_preempt_curr-->check_preempt_wakeup-->resched_task-->set_tsk_need_resched,这个流程的含义是新创建的进程如果可以抢占当前进程,则对当前进程设置TIF_NEED_RESCHED标志,待返回用户态前检查调度。是否可以抢占看wakeup_preempt_entity

/*
* Should 'se' preempt 'curr'.
*
* |s1
* |s2
* |s3
* g
* |<--->|c
*
* w(c, s1) = -1
* w(c, s2) = 0
* w(c, s3) = 1
*
*/
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{
s64 gran, vdiff = curr->vruntime - se->vruntime;

if (vdiff <= 0)
return -1;

gran = wakeup_gran(curr, se);//计算gran
if (vdiff > gran) //如果当前运行进程vruntime和新创建/唤醒的进程vruntime差值>gran
return 1;

return 0;
}static unsigned long
wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
{
unsigned long gran = sysctl_sched_wakeup_granularity;//该变量表示进程被唤醒后至少应该运行的时间的基数,它只是用来判断某个进程是否应该抢占当前进程,并不代表它能够执行的最小时间(sysctl_sched_min_granularity),如果这个数值越小,那么发生抢占的概率也就越高

if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
gran = adaptive_gran(curr, se);

/*
* Since its curr running now, convert the gran from real-time
* to virtual-time in his units.
*/
if (sched_feat(ASYM_GRAN)) {
/*
* By using 'se' instead of 'curr' we penalize light tasks, so
* they get preempted easier. That is, if 'se' < 'curr' then
* the resulting gran will be larger, therefore penalizing the
* lighter, if otoh 'se' > 'curr' then the resulting gran will
* be smaller, again penalizing the lighter task.
*
* This is especially important for buddies when the leftmost
* task is higher priority than the buddy.
*/
if (unlikely(se->load.weight != NICE_0_LOAD))
gran = calc_delta_fair(gran, se);
} else {
if (unlikely(curr->load.weight != NICE_0_LOAD))
gran = calc_delta_fair(gran, curr);
}

return gran;
}4. 进程阻塞,如阻塞在信号量,互斥锁

以互斥锁为例,如果进程获取锁失败会主动调用schedue出让CPU,流程如下mutex_lock-->__mutex_lock_slowpath-->__mutex_lock_common-->schedue,源码不再分析
综合上述:实际上从根本上讲调度时机分两种,一是当前运行进程主动调用schedule出让CPU,如上述2,4;一种是当前运行进程被设置TIF_NEED_RESCHED标志,如进程运行时间到,或者被抢占对应1,3

参考:http://www.cnblogs.com/Daniel-G/p/3307298.html

http://blog.csdn.net/arriod/article/details/7033895
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: