您的位置：首页 > 运维架构 > Linux

linux内核分析之调度——实时调度算法

2011-12-30 17:30 225 查看

linux内核中提供了两种实时调度策略：SCHED_FIFO和SCHED_RR，其中RR是带有时间片的FIFO。这两种调度算法实现的都是静态优先级。内核不为实时进程计算动态优先级。这能保证给定优先级别的实时进程总能抢占优先级比他低得进程。linux的实时调度算法提供了一种软实时工作方式。实时优先级范围从0到MAX_RT_PRIO减一。默认情况下，MAX_RT_PRIO为100，所以默认的实时优先级范围是从0到99.SCHED_NORMAL级进程的nice值共享了这个取值空间；他的取值范围是从MAX_RT_PRIO到MAX_RT_PRIO+40.也就是说，在默认情况下，nice值从-20到19直接对应的是从100到139的实时优先级范围。

数据结构

实时调度的优先级队列

实时调度的优先级队列是一组链表，每个优先级对应一个链表。

/*实时调度的优先级队列，实时调度中，运行进程
 根据优先级放到对应的队列里面，对于相同的优先级
 的进程后面来的进程放到同一优先级队列的队尾
 对于FIFO/RR调度，各自的进程需要设置相关的属性
 进程运行时，要根据task中的这些属性判断和设置
 放弃cpu的时机(运行完或是时间片用完)*/
struct rt_prio_array {
	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
	struct list_head queue[MAX_RT_PRIO];
};

运行队列

运行队列组织实时调度的相关信息

/* Real-Time classes' related field in a runqueue: */
struct rt_rq {
	struct rt_prio_array active;
	unsigned long rt_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
	struct {
		int curr; /* highest queued rt task prio */
#ifdef CONFIG_SMP
		int next; /* next highest */
#endif
	} highest_prio;
#endif
#ifdef CONFIG_SMP
	unsigned long rt_nr_migratory;
	unsigned long rt_nr_total;
	int overloaded;
	struct plist_head pushable_tasks;
#endif
	int rt_throttled;
	u64 rt_time;
	u64 rt_runtime;
	/* Nests inside the rq lock: */
	spinlock_t rt_runtime_lock;

#ifdef CONFIG_RT_GROUP_SCHED
	unsigned long rt_nr_boosted;

	struct rq *rq;
	struct list_head leaf_rt_rq_list;
	struct task_group *tg;
	struct sched_rt_entity *rt_se;
#endif
};

进程调度实体

struct sched_rt_entity {
	struct list_head run_list;
	unsigned long timeout;
	unsigned int time_slice;
	int nr_cpus_allowed;

	struct sched_rt_entity *back;
#ifdef CONFIG_RT_GROUP_SCHED
	struct sched_rt_entity	*parent;
	/* rq on which this entity is (to be) queued: */
	struct rt_rq		*rt_rq;
	/* rq "owned" by this entity/group: */
	struct rt_rq		*my_q;
#endif
};

调度类

static const struct sched_class rt_sched_class = {
	.next			= &fair_sched_class,
	.enqueue_task		= enqueue_task_rt,
	.dequeue_task		= dequeue_task_rt,
	.yield_task		= yield_task_rt,

	.check_preempt_curr	= check_preempt_curr_rt,

	.pick_next_task		= pick_next_task_rt,
	.put_prev_task		= put_prev_task_rt,

#ifdef CONFIG_SMP
	.select_task_rq		= select_task_rq_rt,

	.load_balance		= load_balance_rt,
	.move_one_task		= move_one_task_rt,
	.set_cpus_allowed       = set_cpus_allowed_rt,
	.rq_online              = rq_online_rt,
	.rq_offline             = rq_offline_rt,
	.pre_schedule		= pre_schedule_rt,
	.post_schedule		= post_schedule_rt,
	.task_wake_up		= task_wake_up_rt,
	.switched_from		= switched_from_rt,
#endif

	.set_curr_task          = set_curr_task_rt,
	.task_tick		= task_tick_rt,

	.get_rr_interval	= get_rr_interval_rt,

	.prio_changed		= prio_changed_rt,
	.switched_to		= switched_to_rt,
};

从运行队列中移除函数dequeue_task_rt

static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
{
	struct sched_rt_entity *rt_se = &p->rt;
	/*更新调度信息*/
	update_curr_rt(rq);
	/*实际工作，将rt_se从运行队列中删除然后
	添加到队列尾部*/
	dequeue_rt_entity(rt_se);
	/*从hash表中删除*/
	dequeue_pushable_task(rq, p);
}

/*
 * Update the current task's runtime statistics. Skip current tasks that
 * are not in our scheduling class.
 */
static void update_curr_rt(struct rq *rq)
{
	struct task_struct *curr = rq->curr;
	struct sched_rt_entity *rt_se = &curr->rt;
	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
	u64 delta_exec;

	if (!task_has_rt_policy(curr))/*判断是否问实时调度进程*/
		return;
	/*执行时间*/
	delta_exec = rq->clock - curr->se.exec_start;
	if (unlikely((s64)delta_exec < 0))
		delta_exec = 0;

	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
	/*更新当前进程的总的执行时间*/
	curr->se.sum_exec_runtime += delta_exec;
	account_group_exec_runtime(curr, delta_exec);
	/*更新执行的开始时间*/
	curr->se.exec_start = rq->clock;
	cpuacct_charge(curr, delta_exec);/*主调度相关*/
	/*更新调度信息*/
	sched_rt_avg_update(rq, delta_exec);

	if (!rt_bandwidth_enabled())
		return;

	for_each_sched_rt_entity(rt_se) {
		rt_rq = rt_rq_of_se(rt_se);

		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
			spin_lock(&rt_rq->rt_runtime_lock);
			rt_rq->rt_time += delta_exec;
			if (sched_rt_runtime_exceeded(rt_rq))
				resched_task(curr);
			spin_unlock(&rt_rq->rt_runtime_lock);
		}
	}
}

static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
{
	/*从运行队列中删除*/
	dequeue_rt_stack(rt_se);

	for_each_sched_rt_entity(rt_se) {
		struct rt_rq *rt_rq = group_rt_rq(rt_se);

		if (rt_rq && rt_rq->rt_nr_running)
			__enqueue_rt_entity(rt_se);/*添加到队列尾*/
	}
}

/*
 * Because the prio of an upper entry depends on the lower
 * entries, we must remove entries top - down.
 */
static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
{
	struct sched_rt_entity *back = NULL;

	for_each_sched_rt_entity(rt_se) {/*遍历整个组调度实体*/
		rt_se->back = back;/*可见rt_se的back实体为组调度中前一个调度实体*/
		back = rt_se;
	}
	/*将组中的所有进程从运行队列中移除*/
	for (rt_se = back; rt_se; rt_se = rt_se->back) {
		if (on_rt_rq(rt_se))
			__dequeue_rt_entity(rt_se);
	}
}

删除的实际工作为：

static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
{
	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
	struct rt_prio_array *array = &rt_rq->active;

	list_del_init(&rt_se->run_list);
	if (list_empty(array->queue + rt_se_prio(rt_se)))
		__clear_bit(rt_se_prio(rt_se), array->bitmap);

	dec_rt_tasks(rt_se, rt_rq);
}

添加进程到运行队列

/*
 * Adding/removing a task to/from a priority array:
 */
static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
{
	struct sched_rt_entity *rt_se = &p->rt;

	if (wakeup)
		rt_se->timeout = 0;
	/*实际工作*/
	enqueue_rt_entity(rt_se);

	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
		enqueue_pushable_task(rq, p);/*添加到对应的hash表中*/
}

static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
{
	dequeue_rt_stack(rt_se);/*从运行队列中删除*/
	for_each_sched_rt_entity(rt_se)
		__enqueue_rt_entity(rt_se);/*添加到运行队列尾部*/
}

实际的添加工作：

static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
{
	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
	struct rt_prio_array *array = &rt_rq->active;
	struct rt_rq *group_rq = group_rt_rq(rt_se);
	struct list_head *queue = array->queue + rt_se_prio(rt_se);

	/*
	 * Don't enqueue the group if its throttled, or when empty.
	 * The latter is a consequence of the former when a child group
	 * get throttled and the current group doesn't have any other
	 * active members.
	 */
	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
		return;

	list_add_tail(&rt_se->run_list, queue);
	__set_bit(rt_se_prio(rt_se), array->bitmap);

	inc_rt_tasks(rt_se, rt_rq);
}

选择下一个进程运行

static struct task_struct *pick_next_task_rt(struct rq *rq)
{
	struct task_struct *p = _pick_next_task_rt(rq);/*实际工作*/

	/* The running task is never eligible for pushing */
	if (p)
		dequeue_pushable_task(rq, p);

#ifdef CONFIG_SMP
	/*
	 * We detect this state here so that we can avoid taking the RQ
	 * lock again later if there is no need to push
	 */
	rq->post_schedule = has_pushable_tasks(rq);
#endif

	return p;
}

static struct task_struct *_pick_next_task_rt(struct rq *rq)
{
	struct sched_rt_entity *rt_se;
	struct task_struct *p;
	struct rt_rq *rt_rq;

	rt_rq = &rq->rt;

	if (unlikely(!rt_rq->rt_nr_running))
		return NULL;

	if (rt_rq_throttled(rt_rq))
		return NULL;

	do {/*遍历组调度中的每个进程*/
		rt_se = pick_next_rt_entity(rq, rt_rq);
		BUG_ON(!rt_se);
		rt_rq = group_rt_rq(rt_se);
	} while (rt_rq);

	p = rt_task_of(rt_se);
	/*更新执行域*/
	p->se.exec_start = rq->clock;

	return p;
}

static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
						   struct rt_rq *rt_rq)
{
	struct rt_prio_array *array = &rt_rq->active;
	struct sched_rt_entity *next = NULL;
	struct list_head *queue;
	int idx;
	/*找到第一个可用的*/
	idx = sched_find_first_bit(array->bitmap);
	BUG_ON(idx >= MAX_RT_PRIO);
	/*从链表组中找到对应的链表*/
	queue = array->queue + idx;
	next = list_entry(queue->next, struct sched_rt_entity, run_list);
	/*返回找到的运行实体*/
	return next;
}

总结：对于实时调度，基于linux内核调度框架下作的工作比较简单，把所有的运行进程根据优先级放到不用的队列里面，采用位图方式进行使用记录。进队列仅仅是删除原来队列里面的本进程，然后将他挂到队列尾部；而对于“移除”操作，也仅仅是从队列里面移除后添加到运行队列尾部。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航