您的位置:首页 > 理论基础 > 数据结构算法

timer 子系统的初始化过程

2011-11-29 21:59 369 查看

概览:

系统刚上电时,需要注册 IRQ0 时钟中断,完成时钟源设备,时钟事件设备,tick device 等初始化操作并选择合适的工作模式。由于刚启动时没有特别重要的任务要做,因此默认是进入低精度 + 周期 tick 的工作模式,之后会根据硬件的配置(如硬件上是否支持 HPET 等高精度 timer)和软件的配置(如是否通过命令行参数或者内核配置使能了高精度 timer 等特性)进行切换。在一个支持 hrtimer 高精度模式并使能了 dynamic tick 的系统中,第一次发生 IRQ0 的软中断时 hrtimer
就会进行从低精度到高精度的切换,然后再进一步切换到 NOHZ 模式。IRQ0 为系统的时钟中断,使用全局的时钟事件设备(global_clock_event)来处理的,其定义如下:

static struct irqaction irq0  = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
.name = "timer"
};

它的中断处理函数 timer_interrupt 的简化实现如清单 1 所示:

清单 1. IRQ0 中断处理函数的简化实现
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
. . . .

global_clock_event->event_handler(global_clock_event);
. . . .
return IRQ_HANDLED;
}

在 global_clock_event->event_handler 的处理中,除了更新 local CPU 上运行进程时间的统计,profile 等工作,更重要的是要完成更新 jiffies 等全局操作。这个全局的时钟事件设备的 event_handler 根据使用环境的不同,在低精度模式下可能是 tick_handle_periodic / tick_handle_periodic_broadcast,在高精度模式下是 hrtimer_interrupt。目前只有 HPET 或者 PIT 可以作为
global_clock_event 使用。其初始化流程清单 2所示:

清单 2. timer 子系统的初始化流程
void __init time_init(void)
{
late_time_init = x86_late_time_init;
}

static __init void x86_late_time_init(void)
{
x86_init.timers.timer_init();
tsc_init();
}

/* x86_init.timers.timer_init 是指向 hpet_time_init 的回调指针 */
void __init hpet_time_init(void)
{
if (!hpet_enable())
setup_pit_timer();
setup_default_timer_irq();
}

由清单 2 可以看到,系统优先使用 HPET 作为 global_clock_event,只有在 HPET 没有使能时,PIT 才有机会成为 global_clock_event。在使能 HPET 的过程中,HPET 会同时被注册为时钟源设备和时钟事件设备。

hpet_enable
hpet_clocksource_register
hpet_legacy_clockevent_register
clockevents_register_device(&hpet_clockevent);

clockevent_register_device 会触发 CLOCK_EVT_NOTIFY_ADD 事件,即创建对应的 tick device。然后在 tick_notify 这个事件处理函数中会添加新的 tick device。

clockevent_register_device trigger event CLOCK_EVT_NOTIFY_ADD
tick_notify receives event CLOCK_EVT_NOTIFY_ADD
tick_check_new_device
tick_setup_device

在 tick device 的设置过程中,会根据新加入的时钟事件设备是否使用 broadcast 来分别设置 event_handler。对于 tick device 的处理函数:

表 2. tick device 在不同模式下的处理函数
 low resolution modeHigh resolution mode
periodic ticktick_handle_periodichrtimer_interrupt
dynamic ticktick_nohz_handlerhrtimer_interrupt
另外,在系统运行的过程中,可以通过查看 /proc/timer_list 来显示系统当前配置的所有时钟的详细情况,譬如当前系统活动的时钟源设备,时钟事件设备,tick device 等。也可以通过查看 /proc/timer_stats 来查看当前系统中所有正在使用的 timer 的统计信息。包括所有正在使用 timer 的进程,启动 / 停止 timer 的函数,timer 使用的频率等信息。内核需要配置 CONFIG_TIMER_STATS=y,而且在系统启动时这个功能是关闭的,需要通过如下命令激活"echo
1 >/proc/timer_stats"。/proc/timer_stats 的显示格式如下所示:


细述:

(1)tick_init

/**
* tick_init - initialize the tick control
*
* Register the notifier with the clockevents framework
*/
void __init tick_init(void)
{
clockevents_register_notifier(&tick_notifier);//这里用到了通知链技术,可以参考博文“内核通知链机制的原理及实现
}

static struct notifier_block tick_notifier = {
    .notifier_call = tick_notify,
};

/**
 * clockevents_register_notifier - register a clock events change listener
 */
int clockevents_register_notifier(struct notifier_block *nb)    
{
    unsigned long flags;
    int ret;

    raw_spin_lock_irqsave(&clockevents_lock, flags);            //get the spin lock
    ret = raw_notifier_chain_register(&clockevents_chain, nb);    //register
    raw_spin_unlock_irqrestore(&clockevents_lock, flags);        //unlock

    return ret;
}

/*
 *    Raw notifier chain routines.  There is no protection;
 *    the caller must provide it.  Use at your own risk!
 */

/**
 *    raw_notifier_chain_register - Add notifier to a raw notifier chain
 *    @nh: Pointer to head of the raw notifier chain
 *    @n: New entry in notifier chain
 *
 *    Adds a notifier to a raw notifier chain.
 *    All locking must be provided by the caller.
 *
 *    Currently always returns zero.
 */
int raw_notifier_chain_register(struct raw_notifier_head *nh,
        struct notifier_block *n)
{
    return notifier_chain_register(&nh->head, n);
}


可以看到,tick_init的作用就是调用 clockevents_register_notifier 函数向 clockevents_chain 通知链注册元素: tick_notifier。这个元素的回调函数指明了当时钟事件设备信息发生

变化(例如新加入一个时钟事件设备等等)时,应该执行的操作,该回调函数为 tick_notify。

(2)init_timers()
void __init init_timers(void)
{
//初始化本 CPU 上的软件时钟相关的数据结构
int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id()); //因为是初始化阶段,所以得到的CPU为启动CPU

init_timer_stats();

BUG_ON(err != NOTIFY_OK);
//向 cpu_chain 通知链注册元素 timers_nb ,该元素的回调函数用于初始化指定 CPU 上的软件时钟相关的数据结构
register_cpu_notifier(&timers_nb);
//初始化时钟的软中断处理函数
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}

这个函数完成的主要作用包括:

(1)初始化本 CPU 上的软件时钟相关的数据结构;

(2)向 cpu_chain 通知链注册元素 timers_nb ,该元素的回调函数用于初始化指定 CPU 上的软件时钟相关的数据结构;

(3)初始化时钟的软中断处理函数。

对于操作(1):

static int __cpuinit timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
int err;

switch(action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
err = init_timers_cpu(cpu);  //调用该函数,参数CPU即为启动CPU(或者主CPU)
if (err < 0)
return notifier_from_errno(err);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
case CPU_DEAD_FROZEN:
migrate_timers(cpu);
break;
#endif
default:
break;
}
return NOTIFY_OK;
}

调用init_timers_cpu:

static int __cpuinit init_timers_cpu(int cpu)
{
int j;
struct tvec_base *base;
static char __cpuinitdata tvec_base_done[NR_CPUS];

if (!tvec_base_done[cpu]) {                                        //启动CPU尚未进行tvec的设置
static char boot_done;

if (boot_done) {
/*
* The APs use this path later in boot
*/
base = kmalloc_node(sizeof(*base),
GFP_KERNEL | __GFP_ZERO,
cpu_to_node(cpu));
if (!base)
return -ENOMEM;

/* Make sure that tvec_base is 2 byte aligned */
if (tbase_get_deferrable(base)) {
WARN_ON(1);
kfree(base);
return -ENOMEM;
}
per_cpu(tvec_bases, cpu) = base;
} else {                                                         //第一次进行设置
/*
* This is for the boot CPU - we use compile-time
* static initialisation because per-cpu memory isn't
* ready yet and because the memory allocators are not
* initialised either.
*/
boot_done = 1;
base = &boot_tvec_bases;
}
tvec_base_done[cpu] = 1;
} else {
base = per_cpu(tvec_bases, cpu);
}

spin_lock_init(&base->lock);

//开始初始化5个定时器表

for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
INIT_LIST_HEAD(base->tv4.vec + j);
INIT_LIST_HEAD(base->tv3.vec + j);
INIT_LIST_HEAD(base->tv2.vec + j);
}
for (j = 0; j < TVR_SIZE; j++)
INIT_LIST_HEAD(base->tv1.vec + j);

//默认值为初始化时的jiffes
base->timer_jiffies = jiffies; //当前正在处理的软件时钟到期时间
base->next_timer = base->timer_jiffies;
return 0;
}

对于操作(3),open_softirq(TIMER_SOFTIRQ, run_timer_softirq):

void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec[nr].action = action;
}

我们看到,定时器软中断所对应的action是run_timer_softirq,也就是当时钟中断到来,软中断启动时,就会调用这个函数,我们来看一下这个函数:

/*
* This function runs timers and the timer-tq in bottom half context.
*/
static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __this_cpu_read(tvec_bases);

hrtimer_run_pending();

//判断当前的jiffies是否大于等于最小的那个超时jiffies.是的话就进入定时器处理
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
}

/**
* __run_timers - run all expired timers (if any) on this CPU.
* @base: the timer vector to be processed.
*
* This function cascades all vectors and executes all expired timer
* vectors.
*/
static inline void __run_timers(struct tvec_base *base)
{
struct timer_list *timer;

spin_lock_irq(&base->lock);
while (time_after_eq(jiffies, base->timer_jiffies)) {  //处理所有从时间点timer_jiffies 到 时间点jiffies的事件。
struct list_head work_list;
struct list_head *head = &work_list;
int index = base->timer_jiffies & TVR_MASK; //计算第一组的索引位置

/*
* Cascade timers:
*/
if (!index &&
(!cascade(base, &base->tv2, INDEX(0))) &&               //cascade用于从指定组取得定时器补充前一组。
(!cascade(base, &base->tv3, INDEX(1))) &&
!cascade(base, &base->tv4, INDEX(2)))
cascade(base, &base->tv5, INDEX(3));                    //如果前组都已经是空的了,那么就将第五组的向前移动(因为第五组的时间到期时间实在是太晚,因此一般都不会东它们。)
++base->timer_jiffies;                 //timer_jiffiers记录的是一个时间点,这个时间点之前到期的定时器都已经处理过了。
list_replace_init(base->tv1.vec + index, &work_list); //第一组位于索引位置的所有定时器都转移到一个临时链表中,从原来的数据结构中删除。
while (!list_empty(head)) {                           //分别执行各个定时器的处理程序
void (*fn)(unsigned long);
unsigned long data;

timer = list_first_entry(head, struct timer_list,entry);
fn = timer->function;
data = timer->data;

timer_stats_account_timer(timer);

base->running_timer = timer;
detach_timer(timer, 1);

spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn, data);
spin_lock_irq(&base->lock);
}
}
base->running_timer = NULL;
spin_unlock_irq(&base->lock);
}

(3)hrtimers_init():

void __init hrtimers_init(void)
{
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
#ifdef CONFIG_HIGH_RES_TIMERS
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
#endif
}



与timers_init最主要的区别是操作(3),即初始化软时钟中断。

(4)timerkeeping_init()

我们首先弄清楚两个概念时钟源和时钟事件设备(源):

(1)时钟源

/**
* struct clocksource - hardware abstraction for a free running counter
*	Provides mostly state-free accessors to the underlying hardware.
*	This is the structure used for system time.
*
* @name:		ptr to clocksource name
* @list:		list head for registration
* @rating:		rating value for selection (higher is better)
*			To avoid rating inflation the following
*			list should give you a guide as to how
*			to assign your clocksource a rating
*			1-99: Unfit for real use
*				Only available for bootup and testing purposes.
*			100-199: Base level usability.
*				Functional for real use, but not desired.
*			200-299: Good.
*				A correct and usable clocksource.
*			300-399: Desired.
*				A reasonably fast and accurate clocksource.
*			400-499: Perfect
*				The ideal clocksource. A must-use where
*				available.
* @read:		returns a cycle value, passes clocksource as argument
* @enable:		optional function to enable the clocksource
* @disable:		optional function to disable the clocksource
* @mask:		bitmask for two's complement
*			subtraction of non 64 bit counters
* @mult:		cycle to nanosecond multiplier
* @shift:		cycle to nanosecond divisor (power of two)
* @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
* @flags:		flags describing special properties
* @vread:		vsyscall based read
* @suspend:		suspend function for the clocksource, if necessary
* @resume:		resume function for the clocksource, if necessary
*/
struct clocksource {
/*
* Hotpath data, fits in a single cache line when the
* clocksource itself is cacheline aligned.
*/
cycle_t (*read)(struct clocksource *cs);
cycle_t cycle_last;
cycle_t mask;
u32 mult;
u32 shift;
u64 max_idle_ns;

#ifdef CONFIG_IA64
void *fsys_mmio;        /* used by fsyscall asm code */
#define CLKSRC_FSYS_MMIO_SET(mmio, addr)      ((mmio) = (addr))
#else
#define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
#endif
const char *name;
struct list_head list;
int rating;
cycle_t (*vread)(void);
int (*enable)(struct clocksource *cs);
void (*disable)(struct clocksource *cs);
unsigned long flags;
void (*suspend)(struct clocksource *cs);
void (*resume)(struct clocksource *cs);

#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
/* Watchdog related data, used by the framework */
struct list_head wd_list;
cycle_t cs_last;
cycle_t wd_last;
#endif
} ____cacheline_aligned;

(2)时钟事件源

/**
* struct clock_event_device - clock event device descriptor
* @event_handler:	Assigned by the framework to be called by the low
*			level handler of the event source
* @set_next_event:	set next event function
* @next_event:		local storage for the next event in oneshot mode
* @max_delta_ns:	maximum delta value in ns
* @min_delta_ns:	minimum delta value in ns
* @mult:		nanosecond to cycles multiplier
* @shift:		nanoseconds to cycles divisor (power of two)
* @mode:		operating mode assigned by the management code
* @features:		features
* @retries:		number of forced programming retries
* @set_mode:		set mode function
* @broadcast:		function to broadcast events
* @min_delta_ticks:	minimum delta value in ticks stored for reconfiguration
* @max_delta_ticks:	maximum delta value in ticks stored for reconfiguration
* @name:		ptr to clock event name
* @rating:		variable to rate clock event devices
* @irq:		IRQ number (only for non CPU local devices)
* @cpumask:		cpumask to indicate for which CPUs this device works
* @list:		list head for the management code
*/
struct clock_event_device {
void			(*event_handler)(struct clock_event_device *);
int			(*set_next_event)(unsigned long evt,
struct clock_event_device *);
ktime_t			next_event;
u64			max_delta_ns;
u64			min_delta_ns;
u32			mult;
u32			shift;
enum clock_event_mode	mode;
unsigned int		features;
unsigned long		retries;

void			(*broadcast)(const struct cpumask *mask);
void			(*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
unsigned long		min_delta_ticks;
unsigned long		max_delta_ticks;

const char		*name;
int			rating;
int			irq;
const struct cpumask	*cpumask;
struct list_head	list;
} ____cacheline_aligned;


/*
* timekeeping_init - Initializes the clocksource and common timekeeping values
*/
void __init timekeeping_init(void)
{
struct clocksource *clock;
unsigned long flags;
struct timespec now, boot;

//首先从RTC中读取当前系统的时间

read_persistent_clock(&now);
read_boot_clock(&boot);

write_seqlock_irqsave(&xtime_lock, flags);
//清除  NTP(网络校时协议)接口的变量。
ntp_init();

//选定一个时钟
clock = clocksource_default_clock();

if (clock->enable)
clock->enable(clock);
 
timekeeper_setup_internals(clock);

//设置全局时间变量和raw_time也就是以前的monotonic时间
xtime.tv_sec = now.tv_sec;
xtime.tv_nsec = now.tv_nsec;
raw_time.tv_sec = 0;
raw_time.tv_nsec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
boot.tv_sec = xtime.tv_sec;
boot.tv_nsec = xtime.tv_nsec;
}
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
}

我们着重来看系统选中的默认的时钟源是什么?

struct clocksource * __init __weak clocksource_default_clock(void)
{
return &clocksource_jiffies;
}

struct clocksource clocksource_jiffies = {
.name        = "jiffies",
.rating        = 1, /* lowest valid rating*/
.read        = jiffies_read,
.mask        = 0xffffffff, /*32bits*/
.mult        = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift        = JIFFIES_SHIFT,
};


(5)time_init():
/*
* Initialize TSC and delay the periodic timer init to
* late x86_late_time_init() so ioremap works.
*/
void __init time_init(void)
{
late_time_init = x86_late_time_init;
}

函数x86_late_time_init实际上是初始化tsc时钟源。在time_init中只是把该函数的地址赋给全局变量late_time_init,以后某个时刻肯定会调用它的。

(6)late_time_init()

在timer_init()中,我们将late_time_init初始化为x86_late_time_init():

static __init void x86_late_time_init(void)
{
x86_init.timers.timer_init(); //最终调用hpet_time_init
tsc_init();
}

/* Default timer init function */
void __init hpet_time_init(void)
{
if (!hpet_enable())         // 尝试设置高精度事件定时器(HPET)
setup_pit_timer();  //如果HPET不能使用,则设置可编程间隔定时器
setup_default_timer_irq();
}

我们来看hpet_enable(),它检测HPET是否可用,如果可用则将时钟源设置为HPET:

/**
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
unsigned long hpet_period;
unsigned int id;
u64 freq;
int i;

if (!is_hpet_capable()) //HPET是否可用
return 0;

hpet_set_mapping();     //HPET有自己的内存映射空间

/*
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD); //从内存中读取HPET_PERIOD

/*
* AMD SB700 based systems with spread spectrum enabled use a
* SMM based HPET emulation to provide proper frequency
* setting. The SMM code is initialized with the first HPET
* register access and takes some time to complete. During
* this time the config register reads 0xffffffff. We check
* for max. 1000 loops whether the config register reads a non
* 0xffffffff value to make sure that HPET is up and running
* before we go further. A counting loop is safe, as the HPET
* access takes thousands of CPU cycles. On non SB700 based
* machines this check is only done once and has no side
* effects.
*/
for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
if (i == 1000) {
printk(KERN_WARNING
"HPET config register value = 0xFFFFFFFF. "
"Disabling HPET\n");
goto out_nohpet;
}
}

if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;

/*
* The period is a femto seconds value. Convert it to a
* frequency.
*/
freq = FSEC_PER_SEC;
do_div(freq, hpet_period);
hpet_freq = freq;

/*
* Read the HPET ID register to retrieve the IRQ routing
* information and the number of channels
*/
id = hpet_readl(HPET_ID);
hpet_print_config();

#ifdef CONFIG_HPET_EMULATE_RTC
/*
* The legacy routing mode needs at least two channels, tick timer
* and the rtc emulation channel.
*/
if (!(id & HPET_ID_NUMBER))
goto out_nohpet;
#endif

if (hpet_clocksource_register())  //注册HPET时钟源
goto out_nohpet;

if (id & HPET_ID_LEGSUP) {
hpet_legacy_clockevent_register(); //注册HPET时钟事件源设备(源)。
return 1;
}
return 0;

out_nohpet:
hpet_clear_mapping();
hpet_address = 0;
return 0;
}


整个函数中最重要的两个操作:
(1)hpet_clocksource_register(),注册HPET时钟源

static int hpet_clocksource_register(void)
{
u64 start, now;
cycle_t t1;

/* Start the counter */
hpet_restart_counter();

/* Verify whether hpet counter works */
t1 = hpet_readl(HPET_COUNTER);
rdtscll(start);

/*
* We don't know the TSC frequency yet, but waiting for
* 200000 TSC cycles is safe:
* 4 GHz == 50us
* 1 GHz == 200us
*/
do {
rep_nop();
rdtscll(now);
} while ((now - start) < 200000UL);

if (t1 == hpet_readl(HPET_COUNTER)) {
printk(KERN_WARNING
"HPET counter not counting. HPET disabled\n");
return -ENODEV;
}

clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
return 0;
}


static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
return __clocksource_register_scale(cs, 1, hz);
}


/**
* __clocksource_register_scale - Used to install new clocksources
* @t:		clocksource to be registered
* @scale:	Scale factor multiplied against freq to get clocksource hz
* @freq:	clocksource frequency (cycles per second) divided by scale
*
* Returns -EBUSY if registration fails, zero otherwise.
*
* This *SHOULD NOT* be called directly! Please use the
* clocksource_register_hz() or clocksource_register_khz helper functions.
*/
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{

/* Initialize mult/shift and max_idle_ns */
__clocksource_updatefreq_scale(cs, scale, freq);

/* Add clocksource to the clcoksource list */
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);                //将hpet时钟源插入到clocksource_list中
clocksource_enqueue_watchdog(cs);       //看门狗???
clocksource_select();                   //重新选择系统时钟源
mutex_unlock(&clocksource_mutex);
return 0;
}


在注册了HPET后,系统会选择HPET作为系统的时钟源:
/**
* clocksource_select - Select the best clocksource available
*
* Private function. Must hold clocksource_mutex when called.
*
* Select the clocksource with the best rating, or the clocksource,
* which is selected by userspace override.
*/
static void clocksource_select(void)
{
struct clocksource *best, *cs;

if (!finished_booting || list_empty(&clocksource_list))
return;
/* First clocksource on the list has the best rating. */
best = list_first_entry(&clocksource_list, struct clocksource, list);
/* Check for the override clocksource. */
list_for_each_entry(cs, &clocksource_list, list) {
if (strcmp(cs->name, override_name) != 0)
continue;
/*
* Check to make sure we don't switch to a non-highres
* capable clocksource if the tick code is in oneshot
* mode (highres or nohz)
*/
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
tick_oneshot_mode_active()) {
/* Override clocksource cannot be used. */
printk(KERN_WARNING "Override clocksource %s is not "
"HRT compatible. Cannot switch while in "
"HRT/NOHZ mode\n", cs->name);
override_name[0] = 0;
} else
/* Override clocksource can be used. */
best = cs;
break;
}
if (curr_clocksource != best) {
printk(KERN_INFO "Switching to clocksource %s\n", best->name);
curr_clocksource = best;
timekeeping_notify(curr_clocksource);
}
}

(2)在注册并且选定了HPET后,要hpet_legacy_clockevent_register()

static void hpet_legacy_clockevent_register(void)
{
/* Start HPET legacy interrupts */
hpet_enable_legacy_int();

/*
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
clockevents_config_and_register(&hpet_clockevent, hpet_freq,
HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
global_clock_event = &hpet_clockevent;
printk(KERN_DEBUG "hpet clockevent registered\n");
}

static void hpet_enable_legacy_int(void)
{
    unsigned int cfg = hpet_readl(HPET_CFG);  //从相关内存区读取HPET的CFG

    cfg |= HPET_CFG_LEGACY;                   //将HPET的CFG设置为系统默认的clockevent_device。
    hpet_writel(cfg, HPET_CFG);
    hpet_legacy_int_enabled = 1;
}
 

我们来看一下hpet_clockevent:

/*
* The hpet clock event device
*/
static struct clock_event_device hpet_clockevent = {
.name		= "hpet",
.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode	= hpet_legacy_set_mode,
.set_next_event = hpet_legacy_next_event,
.irq		= 0,
.rating		= 50,
};
继续hpet_legacy_clockevent_register:
/**
* clockevents_config_and_register - Configure and register a clock event device
* @dev:	device to register
* @freq:	The clock frequency
* @min_delta:	The minimum clock ticks to program in oneshot mode
* @max_delta:	The maximum clock ticks to program in oneshot mode
*
* min/max_delta can be 0 for devices which do not support oneshot mode.
*/
void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta)
{
dev->min_delta_ticks = min_delta;
dev->max_delta_ticks = max_delta;
clockevents_config(dev, freq);
clockevents_register_device(dev);
}

static void clockevents_config(struct clock_event_device *dev,
                   u32 freq)
{
    u64 sec;

    if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
        return;

    /*
     * Calculate the maximum number of seconds we can sleep. Limit
     * to 10 minutes for hardware which can program more than
     * 32bit ticks so we still get reasonable conversion values.
     */
    sec = dev->max_delta_ticks;
    do_div(sec, freq);
    if (!sec)
        sec = 1;
    else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
        sec = 600;

    clockevents_calc_mult_shift(dev, freq, sec);
    dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
    dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
}

/**
 * clockevents_register_device - register a clock event device
 * @dev:    device to register
 */
void clockevents_register_device(struct clock_event_device *dev)
{
    unsigned long flags;

    BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
    if (!dev->cpumask) {
        WARN_ON(num_possible_cpus() > 1);
        dev->cpumask = cpumask_of(smp_processor_id());
    }

    raw_spin_lock_irqsave(&clockevents_lock, flags);

    list_add(&dev->list, &clockevent_devices);              //将hpet_clockevent挂到clockevent_devices上。
    clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
    clockevents_notify_released();

    raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}

/*We have converted clockevent_devices to store all active devices, and
*clockevents_released to store all fail-to-add/replace-out devices.
*/

/*
 * Called after a notify add to make devices available which were
 * released from the notifier call.
 */
static void clockevents_notify_released(void)
{
    struct clock_event_device *dev;

    while (!list_empty(&clockevents_released)) {
        dev = list_entry(clockevents_released.next,
                 struct clock_event_device, list);
        list_del(&dev->list);
        list_add(&dev->list, &clockevent_devices);
        clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
    }
}

 
这里,我们需要hpet_clockevent挂到clockevent_devices上的方式。我们看到,clockevent_devices只不过是普通的list_head结构,而非想象中的由clockevent_device组成的链表,在想clockevent_devices上添加元素时,只是将该元素的list字段链入到clockevent_devices上即可,这是内核链表结构的精妙之处!

到现在,我们已经将hpet时钟源和时钟事件源的注册工作完成了!

如果HPET是不可用的,那么

/*
* Initialize the conversion factor and the min/max deltas of the clock event
* structure and register the clock event source with the framework.
*/
void __init setup_pit_timer(void)
{
/*
* Start pit with the boot cpu mask and make it global after the
* IO_APIC has been initialized.
*/
pit_ce.cpumask = cpumask_of(smp_processor_id());

clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF);
global_clock_event = &pit_ce;
}
首先来看一下pit_ce:
static struct clock_event_device pit_ce = {
.name		= "pit",
.features	= CLOCK_EVT_FEAT_PERIODIC,
.set_mode	= pit_set_mode,
.set_next_event	= pit_set_next_event,
.shift		= 32,
};

由于PIT是做为默认时钟源的,因此在setup_pit_timer中我们只需要注册将其注册为时钟事件源即可。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息