您的位置:首页 > 运维架构 > Linux

Linux时间子系统(三) -- clockevent, tick device

2017-08-23 22:18 330 查看
转载请标明出处floater的csdn blog,http://blog.csdn.net/flaoter

1 clockevent

clockevent是具有编程和事件产生能力的定时器,在我使用的ARM SOC平台上底层依赖的硬件是ARM Generic Timer中的Timers。当然,在SOC系统中应该还会有其它的timer可以使用,但是本文的介绍只限于arm的timer。

1.1 数据结构

struct clock_event_device {
void            (*event_handler)(struct clock_event_device *); //事件处理函数指针
int         (*set_next_event)(unsigned long evt,
struct clock_event_device *);  //设置下次触发事件,cycles
int         (*set_next_ktime)(ktime_t expires,
struct clock_event_device *);  //设置下次触发事件,ktime
ktime_t         next_event;
u64         max_delta_ns;  //最大时间差ns
u64         min_delta_ns;  //最小时间差ns
u32         mult;
u32         shift;   //ns和cylces的转换关系
enum clock_event_mode   mode;
unsigned int        features;
unsigned long       retries;

void            (*broadcast)(const struct cpumask *mask);
void            (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
void            (*suspend)(struct clock_event_device *);
void            (*resume)(struct clock_event_device *);
unsigned long       min_delta_ticks;
unsigned long       max_delta_ticks;

const char      *name;
int         rating;
int         irq;
int         bound_on;
const struct cpumask    *cpumask;
struct list_head    list;
struct module       *owner;
} ____cacheline_aligned;


• mode

这个成员是说明clockevent的工作模式,具体的mode设定是由set_mode这个callback函数来完成的。

enum clock_event_mode {
CLOCK_EVT_MODE_UNUSED = 0,
CLOCK_EVT_MODE_SHUTDOWN,
CLOCK_EVT_MODE_PERIODIC,
CLOCK_EVT_MODE_ONESHOT,
CLOCK_EVT_MODE_RESUME,
};


• feature

说明clockevetn设备的特征。CLOCK_EVT_FEAT_PERIODIC说明该硬件timer可以产生周期性的clock event,CLOCK_EVT_FEAT_ONESHOT说明该硬件timer可以产生单触发的clock event。不要将feature和mode的使用场景混淆。

#define CLOCK_EVT_FEAT_PERIODIC     0x000001  //产生周期触发事件的特征
#define CLOCK_EVT_FEAT_ONESHOT      0x000002  //产生单触发事件的特征
#define CLOCK_EVT_FEAT_KTIME        0x000004  //产生事件的时间基准是ktime,不是cycles


• list

内核使用如下两个链表来管理系统中的clock_event_device。clockevent_devices list中是当前active的device。

static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);


1.2 clockevent的建立过程

这里我先对clockevent的注册函数进行介绍,再对在ARM SOC平台调用它的流程进行说明。

1.2.1 clock_event_device的注册

注册函数如下,

void clockevents_register_device(struct clock_event_device *dev)
{
unsigned long flags;

BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
}

raw_spin_lock_irqsave(&clockevents_lock, flags);

list_add(&dev->list, &clockevent_devices);  //将当前clock_event_device设备加入到clockevent_devices list中
tick_check_new_device(dev);  //通知tick device层进行处理,clockevent的替换也在该函数进行
clockevents_notify_released(); //遍历clockevents_released list,添加到clockevent_devices list中

raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}


tick_check_new_device是tick device层的函数,在此只列出它调用的clockevents_exchange_device,clockevents_exchange_device函数不再展开了,它实现将curdev从clockevent_devices list中删除,添加到clockevents_released list中。

void tick_check_new_device(struct clock_event_device *newdev)
{
...
//前面有很多情况的判断,在后续章节中会有介绍
clockevents_exchange_device(curdev, newdev); //将curdev从clockevent_devices list中删除,添加到clockevents_released list中
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
...
}


1.2.2 ARM SOC平台上clockevent的注册

在上一节介绍clocksource的注册时提到过在内核启动阶段的time_init函数的clocksource_of_init中,会对段__clksrc_of_table进行解析,armv8_arch_timer的注册函数arch_timer_init会被调用,在本小节会对此函数进行详解。

在解析此函数之前,先看看dts中关于此timer的定义,

timer {
compatible = "arm,armv8-timer";
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>;
clock-frequency = <26000000>;
};


在时间子系统(一)中曾经对ARM Generic Timer进行过说明,每个processor都有如下四个timer, 并且由于它们都是cpu私有的,所以产生的中断都是PPI类型的。

• A Non-secure EL1 physical timer.

• A Secure EL1 physical timer.

• A Non-secure EL2 physical timer.

• A virtual timer.

关于这几个中断,在内核中有如下枚举描述。

enum ppi_nr {
PHYS_SECURE_PPI,
PHYS_NONSECURE_PPI,
VIRT_PPI,
HYP_PPI,
MAX_TIMER_PPI
};


回顾了这些知识后,在对arch_timer_init进行解析。

static void __init arch_timer_init(struct device_node *np)
{
int i;

if (arch_timers_present & ARCH_CP15_TIMER) {
pr_warn("arch_timer: multiple nodes in dt, skipping\n");
return;
}

arch_timers_present |= ARCH_CP15_TIMER;  //CP15方式访问
for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
arch_timer_ppi[i] = irq_of_parse_and_map(np, i);  //分析dt,申请irq num
arch_timer_detect_rate(NULL, np);  //timer时钟频率

/*
* If HYP mode is available, we know that the physical timer
* has been configured to be accessible from PL1. Use it, so
* that a guest can use the virtual timer instead.
*
* If no interrupt provided for virtual timer, we'll have to
* stick to the physical timer. It'd better be accessible...
*/
//如果hyper模式可用或者没给virtual timer分配中断号,需要使用physical timer
//hyper模式下,guest OS需要访问virtual timer,我们还是使用physical timer
if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {
arch_timer_use_virtual = false;

if (!arch_timer_ppi[PHYS_SECURE_PPI] ||
!arch_timer_ppi[PHYS_NONSECURE_PPI]) {
pr_warn("arch_timer: No interrupt available, giving up\n");
return;
}
}

arch_timer_c3stop = !of_property_read_bool(np, "always-on");

arch_timer_register(); //arch timer的注册
arch_timer_common_init(); //之前介绍clocksource的章节有过介绍
}


请注意下面arch_timer_register中使用的是percpu类型的变量,clock_event_device是percpu的资源。

static int __init arch_timer_register(void)
{
int err;
int ppi;

arch_timer_evt = alloc_percpu(struct clock_event_device);  //为clock_event_device类型的percpu变量申请内存
if (!arch_timer_evt) {
err = -ENOMEM;
goto out;
}

if (arch_timer_use_virtual) {
ppi = arch_timer_ppi[VIRT_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_virt,
"arch_timer", arch_timer_evt);
} else {  //为physcal timer申请中断,需要分别注册secure和non-secure physical timer PPI
ppi = arch_timer_ppi[PHYS_SECURE_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) {
ppi = arch_timer_ppi[PHYS_NONSECURE_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (err)
free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
arch_timer_evt);
}
}

if (err) {
pr_err("arch_timer: can't register interrupt %d (%d)\n",
ppi, err);
goto out_free;
}

err = register_cpu_notifier(&arch_timer_cpu_nb);
if (err)
goto out_free_irq;

err = arch_timer_cpu_pm_init();
if (err)
goto out_unreg_notify;

/* Immediately configure the timer on the boot CPU */
arch_timer_setup(this_cpu_ptr(arch_timer_evt));   //注册clock event device

return 0;
}


平台的percpu timer硬件寄存器都是通过CP15方式来访问。

static int arch_timer_setup(struct clock_event_device *clk)
{
__arch_timer_setup(ARCH_CP15_TIMER, clk);  //通过协处理器CP15访问timer的寄存器

if (arch_timer_use_virtual)
enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0);
else {
enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0);  //使能中断
if (arch_timer_ppi[PHYS_NONSECURE_PPI])
enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
}

arch_counter_set_user_access();
if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
arch_timer_configure_evtstream();

return 0;
}

static void __arch_timer_setup(unsigned type,
struct clock_event_device *clk)
{
//clock_event_device变量赋值
clk->features = CLOCK_EVT_FEAT_ONESHOT;

if (type == ARCH_CP15_TIMER) {
if (arch_timer_c3stop)
clk->features |= CLOCK_EVT_FEAT_C3STOP;
clk->name = "arch_sys_timer";
clk->rating = 450;
clk->cpumask = cpumask_of(smp_processor_id());
if (arch_timer_use_virtual) {
clk->irq = arch_timer_ppi[VIRT_PPI];
clk->set_mode = arch_timer_set_mode_virt;
clk->set_next_event = arch_timer_set_next_event_virt;
} else {
clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
clk->set_mode = arch_timer_set_mode_phys;
clk->set_next_event = arch_timer_set_next_event_phys;
}
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
clk->name = "arch_mem_timer";
clk->rating = 400;
clk->cpumask = cpu_all_mask;
if (arch_timer_mem_use_virtual) {
clk->set_mode = arch_timer_set_mode_virt_mem;
clk->set_next_event =
arch_timer_set_next_event_virt_mem;
} else {
clk->set_mode = arch_timer_set_mode_phys_mem;
clk->set_next_event =
arch_timer_set_next_event_phys_mem;
}
}

clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk);

clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff);  //注册clock_event_device
}


通过__arch_timer_setup函数可见arch-timer的feature并不支持CLOCK_EVT_FEAT_PERIODIC。

void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta)
{
dev->min_delta_ticks = min_delta;
dev->max_delta_ticks = max_delta;
clockevents_config(dev, freq);
clockevents_register_device(dev);
}


clockevents_register_device已经在1.2.1节中进行了解析。至此,clockevent的注册过程就完成了。

内核中还有为应用层提供了sysfs的接口,实现过程不再描述了,可以通过如下接口查看。

cat /sys/devices/system/clockevents/clockevent0/current_device
arch_sys_timer
cat /sys/devices/system/clockevents/clockevent1/current_device
arch_sys_timer


此外,除了cpu core上的clockevent设备外,kernel中还有broadcast的clockevent注册,在此处也不说明了。

2 tick device

2.1 数据结构

struct tick_device只是对struct clock_event_device的一个封装,加入了运行模式变量,支持PERIODIC和ONESHOT两种模式。

struct tick_device {
struct clock_event_device *evtdev;
enum tick_device_mode mode;
};
enum tick_device_mode {
TICKDEV_MODE_PERIODIC,
TICKDEV_MODE_ONESHOT,
};


请注意此处的TICKDEV_MODE_PERIODIC与clock_event_device的成员feature CLOCK_EVT_FEAT_PERIODIC不要一起理解,即使是CLOCK_EVT_FEAT_ONESHOT的clockevent也可以支持TICKDEV_MODE_PERIODIC模式的tick device。

2.2 tickdevice的建立过程

在clock_event_device的注册过程中会调用tick_check_new_device通知tick device层进行处理,上文中只介绍了更新clockevent,此处对检查处理和tick device设备的创建进行说明。

此函数中的条件判断很多,一些场景在我使用的平台并没有出现,所以我只能按照代码进行理解了。

void tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
int cpu;

cpu = smp_processor_id();  //获得本地cpu id
if (!cpumask_test_cpu(cpu, newdev->cpumask))  //是否为本cpu服务
goto out_bc;

td = &per_cpu(tick_cpu_device, cpu);  //获取当前cpu的tick device
curdev = td->evtdev;

/* cpu local device ? */
//此处判断比较复杂,如果newdev只为此cpu服务,继续注册。否则clockevent可以服务多个cpu,这种情况我使用的平台没有出现,不太理解它的处理。
if (!tick_check_percpu(curdev, newdev, cpu))
goto out_bc;

/* Preference decision */
//根据是否支持单触发模式和它的rating值,决定是否替换原来旧的clock_event_device,代码不再进行展开了
if (!tick_check_preferred(curdev, newdev))
goto out_bc;

if (!try_module_get(newdev->owner))
return;

/*
* Replace the eventually existing device by the new
* device. If the current device is the broadcast device, do
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);   //更新clockevent
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
return;
}


根据tick_device_mode会建立周期性的或单触发的tick_device。如果是第一次setup,只能建立周期模式的tick device。

static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;

/*
* First device setup ?
*/
if (!td->evtdev) {   //当前cpu第一次注册tick_device
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {  //需要有一个global的tick device管理全局的jiffies等时间信息
if (!tick_nohz_full_cpu(cpu))
tick_do_timer_cpu = cpu;
else
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
}

/*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC;  //当前cpu第一次设定tick device的时候,缺省设定为周期性的tick
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
}

td->evtdev = newdev;  //将新的clock_event_device赋值给tick_device

/*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);

/*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way. This function also returns !=0 when we keep the
* current active broadcast state for this CPU.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;

if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);
else
tick_setup_oneshot(newdev, handler, next_event);
}


配置周期性的tick_device,需要调用tick_setup_periodic。前面说过cpu第一次设置tick_device时默认配置成周期触发,所以启动阶段每个cpu都会调用tick_setup_period。

void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast);   //(1)

/* Broadcast setup ? */
if (!tick_device_is_functional(dev))
return;

if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) {     //(2)
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
} else {    //(3)
unsigned long seq;
ktime_t next;

do {
seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq));

clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);

for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
next = ktime_add(next, tick_period);
}
}
}


(1)设置event_handler=tick_handle_periodic

(2)clock_event_device支持周期触发,只需要设置clock_event_device的模式为周期触发

(3)clock_event_device不支持周期触发,将clock_event_device设置为单触发模式,并使用clockevents_program_event编程设置下一事件。

在下一次clockevent事件发生时都会调用tick_handle_periodic。

周期性tick的clock event handler的处理函数tick_handle_periodic分析如下,

void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next = dev->next_event;

tick_periodic(cpu);  //(1)

if (dev->mode != CLOCK_EVT_MODE_ONESHOT)  //模式为周期模式的clockevent直接返回
return;
for (;;) {  //模式为单触发的clockevent还需要按照前面的配置,再使用clockevents_program_event编程设置下一事件
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(next, tick_period);

if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
* before we call tick_periodic() in a loop, we need
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
* which then will increment time, possibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
}
}


tick_period会处理全局的时间信息更新任务和本地cpu上的进程时间信息。处理全局时间jiffies时,它需要选用一个全局的tick device来执行。

static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {   //如果当前tick_device是全局的tick_device
write_seqlock(&jiffies_lock);

/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);

do_timer(1);  //更新jiffies
write_sequnlock(&jiffies_lock);
update_wall_time(); //更新墙上时间
}

update_process_times(user_mode(get_irq_regs()));  //更新进程的时间信息,并调用周期调度器schedulet_tick
profile_tick(CPU_PROFILING);
}


到此介绍了每个cpu的tick_device的注册,启动阶段tick_device工作在周期触发模式,并且它对应的event_handler为tick_handle_periodic。在高分辨时钟(CONFIG_HIGH_RES_TIMERS)和动态时钟(CONFIG_TICKLESS)特性开启后还会有变化。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: